diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/Chart.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/Chart.yaml.patch index b4ee2f0666..f21f05fd3b 100644 --- a/packages/rancher-kube-state-metrics/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/Chart.yaml.patch @@ -7,7 +7,7 @@ + catalog.rancher.io/release-name: rancher-kube-state-metrics + catalog.cattle.io/hidden: "true" + catalog.cattle.io/os: linux - apiVersion: v1 + apiVersion: v2 -name: kube-state-metrics +name: rancher-kube-state-metrics description: Install kube-state-metrics to generate and expose cluster-level metrics diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch index f3598e9a5c..881673c4de 100644 --- a/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/templates/deployment.yaml.patch @@ -16,8 +16,8 @@ + image: "{{ template "system_default_registry" . }}{{ .Values.image.repository }}:{{ .Values.image.tag }}" ports: - containerPort: 8080 - livenessProbe: -@@ -200,12 +201,12 @@ + {{- if .Values.selfMonitor.enabled }} +@@ -207,12 +208,12 @@ affinity: {{ toYaml .Values.affinity | indent 8 }} {{- end }} diff --git a/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch b/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch index f86e310a71..9bbec02b8f 100644 --- a/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-kube-state-metrics/generated-changes/patch/values.yaml.patch @@ -10,7 +10,7 @@ image: - repository: k8s.gcr.io/kube-state-metrics/kube-state-metrics + repository: rancher/mirrored-kube-state-metrics-kube-state-metrics - tag: v1.9.8 + tag: v2.0.0 pullPolicy: IfNotPresent @@ -84,6 +88,7 @@ diff --git a/packages/rancher-kube-state-metrics/package.yaml b/packages/rancher-kube-state-metrics/package.yaml index 6e0b1c6d25..b4475ee6f8 100644 --- a/packages/rancher-kube-state-metrics/package.yaml +++ b/packages/rancher-kube-state-metrics/package.yaml @@ -1,4 +1,4 @@ -url: https://github.com/kubernetes/kube-state-metrics.git +url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/kube-state-metrics -commit: bd6335b43ef3eb97639dc41dd605dc12422f60b6 +commit: 086f1f7f0870e110abf30aa6bfe7c141e83cc950 version: 100.0.0 diff --git a/packages/rancher-monitoring/generated-changes/exclude/Chart.lock b/packages/rancher-monitoring/generated-changes/exclude/Chart.lock index b30d86f472..b6d6f97d95 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/Chart.lock +++ b/packages/rancher-monitoring/generated-changes/exclude/Chart.lock @@ -1,12 +1,12 @@ dependencies: - name: kube-state-metrics - repository: https://kubernetes.github.io/kube-state-metrics - version: 2.13.0 + repository: https://prometheus-community.github.io/helm-charts + version: 3.1.1 - name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts - version: 1.16.2 + version: 1.18.1 - name: grafana repository: https://grafana.github.io/helm-charts - version: 6.6.3 -digest: sha256:52acbef377da70248ae3fa926dc7f6601df9022b1b1e17224a8fe99e6995d3af -generated: "2021-03-19T17:50:36.8566658+01:00" + version: 6.12.0 +digest: sha256:11886645ff1ade77d0fefdca90afba4a92f2b535997280074a59828e8d1dab4e +generated: "2021-06-09T16:56:40.364303181+02:00" diff --git a/packages/rancher-monitoring/generated-changes/exclude/hack/requirements.txt b/packages/rancher-monitoring/generated-changes/exclude/hack/requirements.txt index bc2fde78b5..91a83073ce 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/hack/requirements.txt +++ b/packages/rancher-monitoring/generated-changes/exclude/hack/requirements.txt @@ -1,2 +1,2 @@ -PyYAML==5.1.2 +PyYAML==5.4 requests==2.22.0 diff --git a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py index 6777d8287a..860f5a7039 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py +++ b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_grafana_dashboards.py @@ -32,25 +32,11 @@ def new_representer(dumper, data): 'min_kubernetes': '1.14.0-0' }, { - 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/docs/current/op-guide/grafana.json', + 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/grafana.json', 'destination': '../templates/grafana/dashboards-1.14', 'type': 'json', 'min_kubernetes': '1.14.0-0' }, - { - 'source': 'https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/grafana-dashboardDefinitions.yaml', - 'destination': '../templates/grafana/dashboards', - 'type': 'yaml', - 'min_kubernetes': '1.10.0-0', - 'max_kubernetes': '1.14.0-0' - }, - { - 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/docs/current/op-guide/grafana.json', - 'destination': '../templates/grafana/dashboards', - 'type': 'json', - 'min_kubernetes': '1.10.0-0', - 'max_kubernetes': '1.14.0-0' - }, ] # Additional conditions map @@ -74,7 +60,7 @@ def new_representer(dumper, data): https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} -{{- if and (semverCompare ">=%(min_kubernetes)s" $kubeTargetVersion) (semverCompare "<%(max_kubernetes)s" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled%(condition)s }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=%(min_kubernetes)s" $kubeTargetVersion) (semverCompare "<%(max_kubernetes)s" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled%(condition)s }} apiVersion: v1 kind: ConfigMap metadata: diff --git a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py index c35cd0baa6..59c711357e 100644 --- a/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py +++ b/packages/rancher-monitoring/generated-changes/exclude/hack/sync_prometheus_rules.py @@ -30,7 +30,7 @@ def new_representer(dumper, data): 'min_kubernetes': '1.14.0-0' }, { - 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/docs/v3.4.0/op-guide/etcd3_alert.rules.yml', + 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml', 'destination': '../templates/prometheus/rules-1.14', 'min_kubernetes': '1.14.0-0' }, @@ -41,7 +41,7 @@ def new_representer(dumper, data): 'max_kubernetes': '1.14.0-0' }, { - 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/docs/v3.4.0/op-guide/etcd3_alert.rules.yml', + 'source': 'https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml', 'destination': '../templates/prometheus/rules', 'min_kubernetes': '1.10.0-0', 'max_kubernetes': '1.14.0-0' diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml new file mode 100644 index 0000000000..71159849c0 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/alertmanager.rules.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'alertmanager.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }} +{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} +{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: alertmanager.rules + rules: + - alert: AlertmanagerConfigInconsistent + annotations: + message: The configuration of the instances of the Alertmanager cluster `{{`{{`}}$labels.service{{`}}`}}` are out of sync. + expr: count_values("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="{{ $operatorJob }}",namespace="{{ $namespace }}",controller="alertmanager"}) by (name, job, namespace, controller), "service", "$1", "name", "(.*)") != 1 + for: 5m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: AlertmanagerFailedReload + annotations: + message: Reloading Alertmanager's configuration has failed for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}. + expr: alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: AlertmanagerMembersInconsistent + annotations: + message: Alertmanager has not found all other members of the cluster. + expr: |- + alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} + != on (service) GROUP_LEFT() + count by (service) (alertmanager_cluster_members{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"}) + for: 5m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml new file mode 100644 index 0000000000..ce4e87bf45 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/etcd.yaml @@ -0,0 +1,179 @@ +{{- /* +Generated from 'etcd' group from https://raw.githubusercontent.com/etcd-io/website/master/content/en/docs/v3.4/op-guide/etcd3_alert.rules.yml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: etcd + rules: + - alert: etcdInsufficientMembers + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' + expr: sum(up{job=~".*etcd.*"} == bool 1) by (job) < ((count(up{job=~".*etcd.*"}) by (job) + 1) / 2) + for: 3m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdNoLeader + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: 1m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfLeaderChanges + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": instance {{`{{`}} $labels.instance {{`}}`}} has seen {{`{{`}} $value {{`}}`}} leader changes within the last hour.' + expr: rate(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}[15m]) > 3 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) + > 1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code!="OK"}[5m])) BY (job, instance, grpc_service, grpc_method) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) BY (job, instance, grpc_service, grpc_method) + > 5 + for: 5m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdGRPCRequestsSlow + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": gRPC requests to {{`{{`}} $labels.grpc_method {{`}}`}} are taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_type="unary"}[5m])) by (job, instance, grpc_service, grpc_method, le)) + > 0.15 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdMemberCommunicationSlow + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfFailedProposals + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last hour on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighFsyncDurations + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighCommitDurations + annotations: + message: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfFailedHTTPRequests + annotations: + message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}' + expr: |- + sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) + BY (method) > 0.01 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHighNumberOfFailedHTTPRequests + annotations: + message: '{{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + expr: |- + sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m])) + BY (method) > 0.05 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: etcdHTTPRequestsSlow + annotations: + message: etcd instance {{`{{`}} $labels.instance {{`}}`}} HTTP requests to {{`{{`}} $labels.method {{`}}`}} are slow. + expr: |- + histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) + > 0.15 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml new file mode 100644 index 0000000000..cde6feb5c9 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/general.rules.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'general.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: general.rules + rules: + - alert: TargetDown + annotations: + message: '{{`{{`}} $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}} targets are down.' + expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: Watchdog + annotations: + message: 'This is an alert meant to ensure that the entire alerting pipeline is functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty. + + ' + expr: vector(1) + labels: + severity: none +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml new file mode 100644 index 0000000000..08aa7fe2b3 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/k8s.rules.yaml @@ -0,0 +1,83 @@ +{{- /* +Generated from 'k8s.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8s }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules + rules: + - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace) + record: namespace:container_cpu_usage_seconds_total:sum_rate + - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace) + record: namespace:container_memory_usage_bytes:sum + - expr: |- + sum by (namespace, pod_name, container_name) ( + rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m]) + ) + record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate + - expr: |- + sum by(namespace) ( + kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} + * on (endpoint, instance, job, namespace, pod, service) + group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + ) + record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum + - expr: |- + sum by (namespace) ( + kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} + * on (endpoint, instance, job, namespace, pod, service) + group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1) + ) + record: namespace_name:kube_pod_container_resource_requests_cpu_cores:sum + - expr: |- + sum( + label_replace( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: deployment + record: mixin_pod_workload + - expr: |- + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: daemonset + record: mixin_pod_workload + - expr: |- + sum( + label_replace( + kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) by (namespace, workload, pod) + labels: + workload_type: statefulset + record: mixin_pod_workload +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml new file mode 100644 index 0000000000..e3a9296923 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-apiserver.rules.yaml @@ -0,0 +1,39 @@ +{{- /* +Generated from 'kube-apiserver.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserver }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.99' + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.9' + record: cluster_quantile:apiserver_request_latencies:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(apiserver_request_latencies_bucket{job="apiserver"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.5' + record: cluster_quantile:apiserver_request_latencies:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml new file mode 100644 index 0000000000..a8d5400cb4 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-alerting.rules.yaml @@ -0,0 +1,47 @@ +{{- /* +Generated from 'kube-prometheus-node-alerting.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeAlerting }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-alerting.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-node-alerting.rules + rules: + - alert: NodeDiskRunningFull + annotations: + message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 24 hours. + expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)' + for: 30m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: NodeDiskRunningFull + annotations: + message: Device {{`{{`}} $labels.device {{`}}`}} of node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} will be full within the next 2 hours. + expr: '(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)' + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml new file mode 100644 index 0000000000..87f072fd02 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,41 @@ +{{- /* +Generated from 'kube-prometheus-node-recording.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance) + record: instance:node_cpu:rate:sum + - expr: sum((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"})) BY (instance) + record: instance:node_filesystem_usage:sum + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) + record: instance:node_network_receive_bytes:rate:sum + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) + record: instance:node_network_transmit_bytes:rate:sum + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance) + record: instance:node_cpu:ratio + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) + record: cluster:node_cpu:sum_rate5m + - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu)) + record: cluster:node_cpu:ratio +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml new file mode 100644 index 0000000000..46c8d1d4a9 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kube-scheduler.rules.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'kube-scheduler.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-scheduler.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.99' + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.9' + record: cluster_quantile:scheduler_binding_latency:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 + labels: + quantile: '0.5' + record: cluster_quantile:scheduler_binding_latency:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml new file mode 100644 index 0000000000..5c1ebce9ea --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-absent.yaml @@ -0,0 +1,159 @@ +{{- /* +Generated from 'kubernetes-absent' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesAbsent }} +{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} +{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} +{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-absent" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-absent + rules: +{{- if .Values.alertmanager.enabled }} + - alert: AlertmanagerDown + annotations: + message: Alertmanager has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-alertmanagerdown + expr: absent(up{job="{{ $alertmanagerJob }}",namespace="{{ $namespace }}"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.kubeDns.enabled }} + - alert: CoreDNSDown + annotations: + message: CoreDNS has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-corednsdown + expr: absent(up{job="kube-dns"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.kubeApiServer.enabled }} + - alert: KubeAPIDown + annotations: + message: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapidown + expr: absent(up{job="apiserver"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.kubeControllerManager.enabled }} + - alert: KubeControllerManagerDown + annotations: + message: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown + expr: absent(up{job="kube-controller-manager"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.kubeScheduler.enabled }} + - alert: KubeSchedulerDown + annotations: + message: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown + expr: absent(up{job="kube-scheduler"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.kubeStateMetrics.enabled }} + - alert: KubeStateMetricsDown + annotations: + message: KubeStateMetrics has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatemetricsdown + expr: absent(up{job="kube-state-metrics"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.prometheusOperator.kubeletService.enabled }} + - alert: KubeletDown + annotations: + message: Kubelet has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown + expr: absent(up{job="kubelet"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- if .Values.nodeExporter.enabled }} + - alert: NodeExporterDown + annotations: + message: NodeExporter has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-nodeexporterdown + expr: absent(up{job="node-exporter"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} + - alert: PrometheusDown + annotations: + message: Prometheus has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusdown + expr: absent(up{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- if .Values.prometheusOperator.enabled }} + - alert: PrometheusOperatorDown + annotations: + message: PrometheusOperator has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-prometheusoperatordown + expr: absent(up{job="{{ $operatorJob }}",namespace="{{ $namespace }}"} == 1) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml new file mode 100644 index 0000000000..e7a41ca2ab --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-apps.yaml @@ -0,0 +1,200 @@ +{{- /* +Generated from 'kubernetes-apps' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-apps + rules: + - alert: KubePodCrashLooping + annotations: + message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is restarting {{`{{`}} printf "%.2f" $value {{`}}`}} times / 5 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodcrashlooping + expr: rate(kube_pod_container_status_restarts_total{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"}[15m]) * 60 * 5 > 0 + for: 1h + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubePodNotReady + annotations: + message: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than an hour. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepodnotready + expr: sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}", phase=~"Pending|Unknown"}) > 0 + for: 1h + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeDeploymentGenerationMismatch + annotations: + message: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentgenerationmismatch + expr: |- + kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + != + kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeDeploymentReplicasMismatch + annotations: + message: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than an hour. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedeploymentreplicasmismatch + expr: |- + kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + != + kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + for: 1h + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeStatefulSetReplicasMismatch + annotations: + message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetreplicasmismatch + expr: |- + kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeStatefulSetGenerationMismatch + annotations: + message: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetgenerationmismatch + expr: |- + kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: + message: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubestatefulsetupdatenotrolledout + expr: |- + max without (revision) ( + kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + unless + kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + ) + * + ( + kube_statefulset_replicas{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + != + kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + ) + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeDaemonSetRolloutStuck + annotations: + message: Only {{`{{`}} $value {{`}}`}}% of the desired Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are scheduled and ready. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetrolloutstuck + expr: |- + kube_daemonset_status_number_ready{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + / + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} * 100 < 100 + for: 15m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeDaemonSetNotScheduled + annotations: + message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetnotscheduled + expr: |- + kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} + - + kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeDaemonSetMisScheduled + annotations: + message: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubedaemonsetmisscheduled + expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeCronJobRunning + annotations: + message: CronJob {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.cronjob {{`}}`}} is taking more than 1h to complete. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecronjobrunning + expr: time() - kube_cronjob_next_schedule_time{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 3600 + for: 1h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeJobCompletion + annotations: + message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than one hour to complete. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobcompletion + expr: kube_job_spec_completions{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} - kube_job_status_succeeded{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 + for: 1h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeJobFailed + annotations: + message: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubejobfailed + expr: kube_job_status_failed{job="kube-state-metrics", namespace=~"{{ $targetNamespace }}"} > 0 + for: 1h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml new file mode 100644 index 0000000000..b34b442f3b --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-resources.yaml @@ -0,0 +1,121 @@ +{{- /* +Generated from 'kubernetes-resources' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-resources + rules: + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit + expr: |- + sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum) + / + sum(node:node_num_cpu:sum) + > + (count(node:node_num_cpu:sum)-1) / count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit + expr: |- + sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) + / + sum(node_memory_MemTotal_bytes) + > + (count(node:node_num_cpu:sum)-1) + / + count(node:node_num_cpu:sum) + for: 5m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeCPUOvercommit + annotations: + message: Cluster has overcommitted CPU resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecpuovercommit + expr: |- + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"}) + / + sum(node:node_num_cpu:sum) + > 1.5 + for: 5m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeMemOvercommit + annotations: + message: Cluster has overcommitted memory resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubememovercommit + expr: |- + sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + > 1.5 + for: 5m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeQuotaExceeded + annotations: + message: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} printf "%0.0f" $value {{`}}`}}% of its {{`{{`}} $labels.resource {{`}}`}} quota. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubequotaexceeded + expr: |- + 100 * kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 90 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: CPUThrottlingHigh + annotations: + message: '{{`{{`}} printf "%0.0f" $value {{`}}`}}% throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container_name {{`}}`}} in pod {{`{{`}} $labels.pod_name {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-cputhrottlinghigh + expr: |- + 100 * sum(increase(container_cpu_cfs_throttled_periods_total{container_name!="", }[5m])) by (container_name, pod_name, namespace) + / + sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container_name, pod_name, namespace) + > 25 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml new file mode 100644 index 0000000000..6469fffc52 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-storage.yaml @@ -0,0 +1,72 @@ +{{- /* +Generated from 'kubernetes-storage' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-storage + rules: + - alert: KubePersistentVolumeUsageCritical + annotations: + message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical + expr: |- + 100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} + < 3 + for: 1m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubePersistentVolumeFullInFourDays + annotations: + message: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} printf "%0.2f" $value {{`}}`}}% is available. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays + expr: |- + 100 * ( + kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} + / + kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} + ) < 15 + and + predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0 + for: 5m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubePersistentVolumeErrors + annotations: + message: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeerrors + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0 + for: 5m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml new file mode 100644 index 0000000000..da232057be --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/kubernetes-system.yaml @@ -0,0 +1,184 @@ +{{- /* +Generated from 'kubernetes-system' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system + rules: + - alert: KubeNodeNotReady + annotations: + message: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than an hour.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubenodenotready + expr: kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 + for: 1h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeVersionMismatch + annotations: + message: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeversionmismatch + expr: count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1 + for: 1h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeClientErrors + annotations: + message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}}% errors.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors + expr: |- + (sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job) + / + sum(rate(rest_client_requests_total[5m])) by (instance, job)) + * 100 > 1 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeClientErrors + annotations: + message: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} printf "%0.0f" $value {{`}}`}} errors / second. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclienterrors + expr: sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeletTooManyPods + annotations: + message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods + expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 + for: 15m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPILatencyHigh + annotations: + message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh + expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPILatencyHigh + annotations: + message: The API server has a 99th percentile latency of {{`{{`}} $value {{`}}`}} seconds for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapilatencyhigh + expr: cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh + expr: |- + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh + expr: |- + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh + expr: |- + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeAPIErrorsHigh + annotations: + message: API server is returning errors for {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.verb {{`}}`}} {{`{{`}} $labels.resource {{`}}`}} {{`{{`}} $labels.subresource {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeapierrorshigh + expr: |- + sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb) + / + sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeClientCertificateExpiration + annotations: + message: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: KubeClientCertificateExpiration + annotations: + message: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours. + runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeclientcertificateexpiration + expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml new file mode 100644 index 0000000000..c75f1ae074 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-network.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'node-network' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-network + rules: + - alert: NetworkReceiveErrors + annotations: + message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing receive errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" + expr: rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: NetworkTransmitErrors + annotations: + message: Network interface "{{`{{`}} $labels.device {{`}}`}}" showing transmit errors on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" + expr: rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0 + for: 2m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: NodeNetworkInterfaceFlapping + annotations: + message: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing it's up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}" + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: 2m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml new file mode 100644 index 0000000000..b7a2fc92fd --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node-time.yaml @@ -0,0 +1,37 @@ +{{- /* +Generated from 'node-time' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.time }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-time" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-time + rules: + - alert: ClockSkewDetected + annotations: + message: Clock skew detected on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}}. Ensure NTP is configured correctly on this host. + expr: abs(node_timex_offset_seconds{job="node-exporter"}) > 0.03 + for: 2m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml new file mode 100644 index 0000000000..2bc7af3a97 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/node.rules.yaml @@ -0,0 +1,202 @@ +{{- /* +Generated from 'node.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node.rules + rules: + - expr: sum(min(kube_pod_info) by (node)) + record: ':kube_pod_info_node_count:' + - expr: max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod) + record: 'node_namespace_pod:kube_pod_info:' + - expr: |- + count by (node) (sum by (node, cpu) ( + node_cpu_seconds_total{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + )) + record: node:node_num_cpu:sum + - expr: 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])) + record: :node_cpu_utilisation:avg1m + - expr: |- + 1 - avg by (node) ( + rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info:) + record: node:node_cpu_utilisation:avg1m + - expr: |- + node:node_cpu_utilisation:avg1m + * + node:node_num_cpu:sum + / + scalar(sum(node:node_num_cpu:sum)) + record: node:cluster_cpu_utilisation:ratio + - expr: |- + sum(node_load1{job="node-exporter"}) + / + sum(node:node_num_cpu:sum) + record: ':node_cpu_saturation_load1:' + - expr: |- + sum by (node) ( + node_load1{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + node:node_num_cpu:sum + record: 'node:node_cpu_saturation_load1:' + - expr: |- + 1 - + sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + / + sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: ':node_memory_utilisation:' + - expr: sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + record: :node_memory_MemFreeCachedBuffers_bytes:sum + - expr: sum(node_memory_MemTotal_bytes{job="node-exporter"}) + record: :node_memory_MemTotal_bytes:sum + - expr: |- + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_available:sum + - expr: |- + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_bytes_total:sum + - expr: |- + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + node:node_memory_bytes_total:sum + record: node:node_memory_utilisation:ratio + - expr: |- + (node:node_memory_bytes_total:sum - node:node_memory_bytes_available:sum) + / + scalar(sum(node:node_memory_bytes_total:sum)) + record: node:cluster_memory_utilisation:ratio + - expr: |- + 1e3 * sum( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + ) + record: :node_memory_swap_io_bytes:sum_rate + - expr: |- + 1 - + sum by (node) ( + (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"}) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + / + sum by (node) ( + node_memory_MemTotal_bytes{job="node-exporter"} + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: 'node:node_memory_utilisation:' + - expr: 1 - (node:node_memory_bytes_available:sum / node:node_memory_bytes_total:sum) + record: 'node:node_memory_utilisation_2:' + - expr: |- + 1e3 * sum by (node) ( + (rate(node_vmstat_pgpgin{job="node-exporter"}[1m]) + + rate(node_vmstat_pgpgout{job="node-exporter"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_memory_swap_io_bytes:sum_rate + - expr: avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_utilisation:avg_irate + - expr: |- + avg by (node) ( + irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_utilisation:avg_irate + - expr: avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])) + record: :node_disk_saturation:avg_irate + - expr: |- + avg by (node) ( + irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_disk_saturation:avg_irate + - expr: |- + max by (instance, namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} + - node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_usage:' + - expr: max by (instance, namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}) + record: 'node:node_filesystem_avail:' + - expr: |- + sum(irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_utilisation:sum_irate + - expr: |- + sum by (node) ( + (irate(node_network_receive_bytes_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_bytes_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_utilisation:sum_irate + - expr: |- + sum(irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + + sum(irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + record: :node_net_saturation:sum_irate + - expr: |- + sum by (node) ( + (irate(node_network_receive_drop_total{job="node-exporter",device!~"veth.+"}[1m]) + + irate(node_network_transmit_drop_total{job="node-exporter",device!~"veth.+"}[1m])) + * on (namespace, pod) group_left(node) + node_namespace_pod:kube_pod_info: + ) + record: node:node_net_saturation:sum_irate + - expr: |- + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_total:' + - expr: |- + max( + max( + kube_pod_info{job="kube-state-metrics", host_ip!=""} + ) by (node, host_ip) + * on (host_ip) group_right (node) + label_replace( + (max(node_filesystem_files_free{job="node-exporter", mountpoint="/"}) by (instance)), "host_ip", "$1", "instance", "(.*):.*" + ) + ) by (node) + record: 'node:node_inodes_free:' +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml new file mode 100644 index 0000000000..a8a8915b62 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus-operator.yaml @@ -0,0 +1,49 @@ +{{- /* +Generated from 'prometheus-operator' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }} +{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus-operator + rules: + - alert: PrometheusOperatorReconcileErrors + annotations: + message: Errors while reconciling {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} Namespace. + expr: rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusOperatorNodeLookupErrors + annotations: + message: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace. + expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml new file mode 100644 index 0000000000..0480c83b56 --- /dev/null +++ b/packages/rancher-monitoring/generated-changes/exclude/templates/prometheus/rules/prometheus.rules.yaml @@ -0,0 +1,139 @@ +{{- /* +Generated from 'prometheus.rules' group from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/release-0.1/manifests/prometheus-rules.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} +{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus.rules + rules: + - alert: PrometheusConfigReloadFailed + annotations: + description: Reloading Prometheus' configuration has failed for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} + summary: Reloading Prometheus' configuration failed + expr: prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} == 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusNotificationQueueRunningFull + annotations: + description: Prometheus' alert notification queue is running full for {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} + summary: Prometheus' alert notification queue is running full + expr: predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) > prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}} + summary: Errors while sending alert from Prometheus + expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.01 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusErrorSendingAlerts + annotations: + description: Errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.Alertmanager{{`}}`}} + summary: Errors while sending alerts from Prometheus + expr: rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.03 + for: 10m + labels: + severity: critical +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusNotConnectedToAlertmanagers + annotations: + description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} is not connected to any Alertmanagers + summary: Prometheus is not connected to any Alertmanagers + expr: prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} < 1 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusTSDBReloadsFailing + annotations: + description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} reload failures over the last four hours.' + summary: Prometheus has issues reloading data blocks from disk + expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 + for: 12h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusTSDBCompactionsFailing + annotations: + description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} had {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last four hours.' + summary: Prometheus has issues compacting sample blocks + expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[2h]) > 0 + for: 12h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusTSDBWALCorruptions + annotations: + description: '{{`{{`}}$labels.job{{`}}`}} at {{`{{`}}$labels.instance{{`}}`}} has a corrupted write-ahead log (WAL).' + summary: Prometheus write-ahead log is corrupted + expr: prometheus_tsdb_wal_corruptions_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"} > 0 + for: 4h + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusNotIngestingSamples + annotations: + description: Prometheus {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} isn't ingesting samples. + summary: Prometheus isn't ingesting samples + expr: rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) <= 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} + - alert: PrometheusTargetScrapesDuplicate + annotations: + description: '{{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has many samples rejected due to duplicate timestamps but different values' + summary: Prometheus has many samples rejected + expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: 10m + labels: + severity: warning +{{- if .Values.defaultRules.additionalRuleLabels }} +{{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/nginx.json b/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/nginx.json index 347c9eb05c..d4793ac678 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/nginx.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/nginx.json @@ -1415,7 +1415,7 @@ "multi": false, "name": "ingress", "options": [], - "query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller=~\"$controller\"}, ingress) ", + "query": "label_values(nginx_ingress_controller_requests{namespace=~\"$namespace\",controller_class=~\"$controller_class\",controller_pod=~\"$controller\"}, ingress) ", "refresh": 1, "regex": "", "sort": 2, @@ -1460,4 +1460,4 @@ "title": "NGINX / Ingress Controller", "uid": "nginx", "version": 1 -} +} \ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/request-handling-performance.json b/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/request-handling-performance.json index 5635ae9764..d0125f0ac9 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/request-handling-performance.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/ingress-nginx/request-handling-performance.json @@ -481,7 +481,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress = \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress = \"$ingress\",\n}[1m]))", + "expr": "sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n status =~ \"[4-5].*\"\n}[1m])) / sum by (path) (rate(nginx_ingress_controller_request_duration_seconds_count{\n ingress =~ \"$ingress\",\n}[1m]))", "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", @@ -573,7 +573,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress = \"$ingress\"}[1m]))", + "expr": "sum by (path) (rate(nginx_ingress_controller_response_duration_seconds_sum{ingress =~ \"$ingress\"}[1m]))", "interval": "", "intervalFactor": 1, "legendFormat": "{{ path }}", @@ -764,7 +764,7 @@ "refId": "D" }, { - "expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n namespace =~ \"$namespace\",\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n", + "expr": " sum (rate(nginx_ingress_controller_response_size_bucket{\n ingress =~ \"$ingress\",\n }[1m])) by (le)\n", "hide": true, "legendFormat": "{{le}}", "refId": "A" @@ -978,4 +978,4 @@ "title": "NGINX / Request Handling Performance", "uid": "4GFbkOsZk", "version": 1 - } +} diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster-nodes.json b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster-nodes.json index b8c1ab7e6f..b33895a052 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster-nodes.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster-nodes.json @@ -565,25 +565,25 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "Receive Errors ({{instance}})", "refId": "A" }, { - "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "Receive Total ({{instance}})", "refId": "B" }, { - "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_outbound_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "Transmit Errors ({{instance}})", "refId": "C" }, { - "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) by (instance) OR sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "Receive Dropped ({{instance}})", "refId": "D" @@ -696,7 +696,7 @@ "refId": "A" }, { - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "Receive Total ({{instance}})", "refId": "B" diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster.json b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster.json index 29cc916757..8fccbc24c1 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/cluster/rancher-cluster.json @@ -551,25 +551,25 @@ "steppedLine": false, "targets": [ { - "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Errors", "refId": "A" }, { - "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Total", "refId": "B" }, { - "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Transmit Errors", "refId": "C" }, { - "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Dropped", "refId": "D" @@ -679,7 +679,7 @@ "refId": "A" }, { - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval]))", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*'}[$__rate_interval]))", "interval": "", "legendFormat": "Receive Total", "refId": "B" diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/home/rancher-default-home.json b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/home/rancher-default-home.json index 7923d69697..13b153cf8d 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/home/rancher-default-home.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/home/rancher-default-home.json @@ -470,7 +470,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kube_node_status_allocatable_cpu_cores{})", + "expr": "sum(kube_node_status_allocatable_cpu_cores{}) OR sum(kube_node_status_allocatable{resource=\"cpu\",unit=\"core\"})", "interval": "10s", "intervalFactor": 1, "refId": "A", @@ -654,7 +654,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum (kube_node_status_allocatable_memory_bytes{})", + "expr": "sum(kube_node_status_allocatable_memory_bytes{}) OR sum(kube_node_status_allocatable{resource=\"memory\", unit=\"byte\"})", "interval": "10s", "intervalFactor": 1, "refId": "A", diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node-detail.json b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node-detail.json index 0b57efa2ec..d71bc02b78 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node-detail.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node-detail.json @@ -561,25 +561,25 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", "interval": "", "legendFormat": "Receive Errors ({{device}})", "refId": "A" }, { - "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", "interval": "", "legendFormat": "Receive Total ({{device}})", "refId": "B" }, { - "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_outbound_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", "interval": "", "legendFormat": "Transmit Errors ({{device}})", "refId": "C" }, { - "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) by (device) OR sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", "interval": "", "legendFormat": "Receive Dropped ({{device}})", "refId": "D" @@ -692,7 +692,7 @@ "refId": "A" }, { - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) by (device)", "interval": "", "legendFormat": "Receive Total ({{device}})", "refId": "B" diff --git a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node.json b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node.json index 7324c4164b..c4b77db64c 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node.json +++ b/packages/rancher-monitoring/generated-changes/overlay/files/rancher/nodes/rancher-node.json @@ -551,25 +551,25 @@ "steppedLine": false, "targets": [ { - "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Errors", "refId": "A" }, { - "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_packets_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Total", "refId": "B" }, { - "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_transmit_errs_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_outbound_errors_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Transmit Errors", "refId": "C" }, { - "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", + "expr": "(sum(rate(node_network_receive_drop_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0)) + (sum(rate(windows_net_packets_received_discarded_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval])) OR on() vector(0))", "interval": "", "legendFormat": "Receive Dropped", "refId": "D" @@ -679,7 +679,7 @@ "refId": "A" }, { - "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|veth.*|docker.*|flannel.*|cali.*|cbr.*\", instance=~\"$instance\"}[$__rate_interval]) OR rate(windows_net_packets_received_total_total{nic!~'.*isatap.*|.*VPN.*|.*Pseudo.*|.*tunneling.*', instance=~\"$instance\"}[$__rate_interval]))", "interval": "", "legendFormat": "Receive Total", "refId": "B" diff --git a/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml b/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml index a42f46e5c8..0cbc07f697 100644 --- a/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml +++ b/packages/rancher-monitoring/generated-changes/overlay/templates/rancher-monitoring/exporters/ingress-nginx/servicemonitor.yaml @@ -24,6 +24,9 @@ spec: {{- if .Values.ingressNginx.serviceMonitor.interval}} interval: {{ .Values.ingressNginx.serviceMonitor.interval }} {{- end }} + {{- if .Values.ingressNginx.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.ingressNginx.serviceMonitor.proxyUrl}} + {{- end }} bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token {{- if .Values.ingressNginx.serviceMonitor.metricRelabelings }} metricRelabelings: diff --git a/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch index 7611b6c7dc..9f92e82a2a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/Chart.yaml.patch @@ -1,9 +1,47 @@ --- charts-original/Chart.yaml +++ charts/Chart.yaml -@@ -5,6 +5,15 @@ +@@ -1,3 +1,35 @@ ++apiVersion: v2 ++description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. ++icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png ++engine: gotpl ++type: application ++maintainers: ++ - name: vsliouniaev ++ - name: bismarck ++ - name: gianrubio ++ email: gianrubio@gmail.com ++ - name: gkarthiks ++ email: github.gkarthiks@gmail.com ++ - name: scottrigby ++ email: scott@r6by.com ++ - name: Xtigyro ++ email: miroslav.hadzhiev@gmail.com ++ - name: Arvind ++ email: arvind.iyengar@suse.com ++ url: "" ++name: rancher-monitoring ++sources: ++ - https://github.com/prometheus-community/helm-charts ++ - https://github.com/prometheus-operator/kube-prometheus ++version: 16.6.0 ++appVersion: 0.48.0 ++kubeVersion: ">=1.16.0-0" ++home: https://github.com/prometheus-operator/kube-prometheus ++keywords: ++- operator ++- prometheus ++- kube-prometheus ++- monitoring + annotations: + artifacthub.io/links: | + - name: Chart Source +@@ -5,8 +37,15 @@ - name: Upstream Project url: https://github.com/prometheus-operator/kube-prometheus artifacthub.io/operator: "true" +-apiVersion: v2 +-appVersion: 0.48.0 + catalog.cattle.io/certified: rancher + catalog.cattle.io/namespace: cattle-monitoring-system + catalog.cattle.io/release-name: rancher-monitoring @@ -13,10 +51,10 @@ + catalog.cattle.io/auto-install: rancher-monitoring-crd=match + catalog.cattle.io/requests-cpu: "4500m" + catalog.cattle.io/requests-memory: "4000Mi" - apiVersion: v2 - appVersion: 0.46.0 dependencies: -@@ -71,19 +80,17 @@ + - condition: grafana.enabled + name: grafana +@@ -71,34 +110,6 @@ - condition: rkeScheduler.enabled name: rkeScheduler repository: file://./charts/rkeScheduler @@ -28,29 +66,27 @@ - and Prometheus rules combined with documentation and scripts to provide easy to - operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus - Operator. -+description: Collects several related Helm charts, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator. - home: https://github.com/prometheus-operator/kube-prometheus - icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png - keywords: - - operator - - prometheus - - kube-prometheus -+- monitoring - kubeVersion: '>=1.16.0-0' - maintainers: - - name: vsliouniaev -@@ -96,9 +103,12 @@ - name: scottrigby - - email: miroslav.hadzhiev@gmail.com - name: Xtigyro +-home: https://github.com/prometheus-operator/kube-prometheus +-icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +-keywords: +-- operator +-- prometheus +-- kube-prometheus +-kubeVersion: '>=1.16.0-0' +-maintainers: +-- name: vsliouniaev +-- name: bismarck +-- email: gianrubio@gmail.com +- name: gianrubio +-- email: github.gkarthiks@gmail.com +- name: gkarthiks +-- email: scott@r6by.com +- name: scottrigby +-- email: miroslav.hadzhiev@gmail.com +- name: Xtigyro -name: kube-prometheus-stack -+- name: Arvind -+ email: arvind.iyengar@suse.com -+ url: "" -+name: rancher-monitoring - sources: - - https://github.com/prometheus-community/helm-charts - - https://github.com/prometheus-operator/kube-prometheus - type: application --version: 14.5.0 -+version: 14.5.1 +-sources: +-- https://github.com/prometheus-community/helm-charts +-- https://github.com/prometheus-operator/kube-prometheus +-type: application +-version: 16.6.0 diff --git a/packages/rancher-monitoring/generated-changes/patch/README.md.patch b/packages/rancher-monitoring/generated-changes/patch/README.md.patch index 67d72c1e8c..aa173fe845 100644 --- a/packages/rancher-monitoring/generated-changes/patch/README.md.patch +++ b/packages/rancher-monitoring/generated-changes/patch/README.md.patch @@ -1,6 +1,6 @@ --- charts-original/README.md +++ charts/README.md -@@ -171,7 +171,43 @@ +@@ -193,7 +193,43 @@ helm show values prometheus-community/kube-prometheus-stack ``` diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/_helpers.tpl.patch b/packages/rancher-monitoring/generated-changes/patch/templates/_helpers.tpl.patch index cdf02762b4..3629a55191 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/_helpers.tpl.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/_helpers.tpl.patch @@ -131,3 +131,12 @@ {{/* vim: set filetype=mustache: */}} {{/* Expand the name of the chart. This is suffixed with -alertmanager, which means subtract 13 from longest 63 available */}} {{- define "kube-prometheus-stack.name" -}} +@@ -48,7 +175,7 @@ + {{- define "kube-prometheus-stack.labels" }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/instance: {{ .Release.Name }} +-app.kubernetes.io/version: "{{ .Chart.Version }}" ++app.kubernetes.io/version: "{{ replace "+" "_" .Chart.Version }}" + app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }} + chart: {{ template "kube-prometheus-stack.chartref" . }} + release: {{ $.Release.Name | quote }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch index 89930fd990..98457ddbe3 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/alertmanager/alertmanager.yaml.patch @@ -1,7 +1,7 @@ --- charts-original/templates/alertmanager/alertmanager.yaml +++ charts/templates/alertmanager/alertmanager.yaml -@@ -9,7 +9,7 @@ - {{ include "kube-prometheus-stack.labels" . | indent 4 }} +@@ -13,7 +13,7 @@ + {{- end }} spec: {{- if .Values.alertmanager.alertmanagerSpec.image }} - image: {{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }} @@ -9,7 +9,7 @@ version: {{ .Values.alertmanager.alertmanagerSpec.image.tag }} {{- if .Values.alertmanager.alertmanagerSpec.image.sha }} sha: {{ .Values.alertmanager.alertmanagerSpec.image.sha }} -@@ -22,11 +22,13 @@ +@@ -26,11 +26,13 @@ externalUrl: "{{ tpl .Values.alertmanager.alertmanagerSpec.externalUrl . }}" {{- else if and .Values.alertmanager.ingress.enabled .Values.alertmanager.ingress.hosts }} externalUrl: "http://{{ tpl (index .Values.alertmanager.ingress.hosts 0) . }}{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" @@ -24,7 +24,7 @@ {{ toYaml .Values.alertmanager.alertmanagerSpec.nodeSelector | indent 4 }} {{- end }} paused: {{ .Values.alertmanager.alertmanagerSpec.paused }} -@@ -100,8 +102,8 @@ +@@ -104,8 +106,8 @@ - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-alertmanager]} {{- end }} {{- end }} @@ -34,7 +34,7 @@ {{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }} {{- end }} {{- if .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints }} -@@ -136,10 +138,10 @@ +@@ -140,10 +142,10 @@ {{ toYaml .Values.alertmanager.alertmanagerSpec.volumeMounts | indent 4 }} {{- end }} portName: {{ .Values.alertmanager.alertmanagerSpec.portName }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kubelet/servicemonitor.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kubelet/servicemonitor.yaml.patch index 2040d6b834..29cd80d07a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kubelet/servicemonitor.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/exporters/kubelet/servicemonitor.yaml.patch @@ -15,7 +15,7 @@ labels: app: {{ template "kube-prometheus-stack.name" . }}-kubelet {{- include "kube-prometheus-stack.labels" . | indent 4 }} -@@ -76,7 +79,7 @@ +@@ -85,7 +88,7 @@ {{- if .Values.kubelet.serviceMonitor.resource }} - port: https-metrics scheme: https @@ -24,7 +24,7 @@ {{- if .Values.kubelet.serviceMonitor.interval }} interval: {{ .Values.kubelet.serviceMonitor.interval }} {{- end }} -@@ -125,7 +128,7 @@ +@@ -143,7 +146,7 @@ {{- end }} {{- if .Values.kubelet.serviceMonitor.resource }} - port: http-metrics diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch index c1de54860a..6355241d79 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/cluster-total.yaml.patch @@ -9,19 +9,3 @@ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1825,7 +1825,7 @@ - "datasource": "$datasource", - "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, - "includeAll": false, -- "label": null, -+ "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ -@@ -1879,4 +1879,4 @@ - "uid": "ff635a025bcfea7bc3dd4f508990a3e9", - "version": 0 - } --{{- end }} -\ No newline at end of file -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch index 3a8bbdd46c..cf6f2788a5 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/controller-manager.yaml.patch @@ -4,8 +4,8 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }} -+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +-{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }} ++{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeControllerManager.enabled" .)}} apiVersion: v1 kind: ConfigMap @@ -19,11 +19,11 @@ "tableColumn": "", "targets": [ { -- "expr": "sum(up{job=\"kube-controller-manager\"})", +- "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})", + {{- if .Values.k3sServer.enabled }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", metrics_path=\"/metrics\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"})", + {{- end }} "format": "time_series", "intervalFactor": 2, @@ -32,8 +32,8 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(workqueue_adds_total{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", -+ "expr": "sum(rate(workqueue_adds_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", +- "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", ++ "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -41,8 +41,8 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(workqueue_depth{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", -+ "expr": "sum(rate(workqueue_depth{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", +- "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name)", ++ "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -50,8 +50,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))", +- "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[5m])) by (instance, name, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, name, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} {{`{{`}}name{{`}}`}}", @@ -92,8 +92,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -101,8 +101,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -110,8 +110,8 @@ "steppedLine": false, "targets": [ { -- "expr": "process_resident_memory_bytes{job=\"kube-controller-manager\",instance=~\"$instance\"}", -+ "expr": "process_resident_memory_bytes{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", +- "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", ++ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -119,8 +119,8 @@ "steppedLine": false, "targets": [ { -- "expr": "rate(process_cpu_seconds_total{job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", -+ "expr": "rate(process_cpu_seconds_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}[5m])", +- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[5m])", ++ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -128,21 +128,21 @@ "steppedLine": false, "targets": [ { -- "expr": "go_goroutines{job=\"kube-controller-manager\",instance=~\"$instance\"}", -+ "expr": "go_goroutines{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", +- "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", ++ "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1100,7 +1105,7 @@ +@@ -1126,7 +1131,7 @@ "options": [ ], -- "query": "label_values(process_cpu_seconds_total{job=\"kube-controller-manager\"}, instance)", -+ "query": "label_values(process_cpu_seconds_total{job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", +- "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", ++ "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeControllerManager.jobName" . }}\"}, instance)", "refresh": 2, "regex": "", "sort": 1, -@@ -1148,4 +1153,5 @@ +@@ -1174,4 +1179,5 @@ "uid": "72e0e05bef5099e5f049b05fdc429ed4", "version": 0 } diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch index bd54479326..2d2b35a18a 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/etcd.yaml.patch @@ -4,8 +4,8 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} -+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +-{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} ++{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeEtcd.enabled" .)}} apiVersion: v1 kind: ConfigMap @@ -16,7 +16,7 @@ annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} @@ -1113,4 +1114,5 @@ - "uid": "c2f4e12cdf69feb95caa41a5a1b423d9", + "title": "etcd", "version": 215 } +{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch index 4dc8980edf..35ee92b1b4 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/k8s-coredns.yaml.patch @@ -1,14 +1,11 @@ --- charts-original/templates/grafana/dashboards-1.14/k8s-coredns.yaml +++ charts/templates/grafana/dashboards-1.14/k8s-coredns.yaml -@@ -4,10 +4,8 @@ +@@ -4,7 +4,7 @@ apiVersion: v1 kind: ConfigMap metadata: - namespace: {{ template "kube-prometheus-stack.namespace" . }} + namespace: {{ .Values.grafana.defaultDashboards.namespace }} name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} -- annotations: --{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} - labels: - {{- if $.Values.grafana.sidecar.dashboards.label }} - {{ $.Values.grafana.sidecar.dashboards.label }}: "1" + annotations: + {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch index fdb6169504..c3331e5ac0 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/kubelet.yaml.patch @@ -4,8 +4,8 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubelet.enabled }} -+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +-{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubelet.enabled }} ++{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubelet.enabled" .) }} apiVersion: v1 kind: ConfigMap @@ -19,8 +19,8 @@ "tableColumn": "", "targets": [ { -- "expr": "sum(up{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", -+ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})", +- "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\"})", ++ "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch index 2ad27f1684..e2e2d07d08 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-pod.yaml.patch @@ -9,19 +9,3 @@ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1295,7 +1295,7 @@ - "datasource": "$datasource", - "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, - "includeAll": false, -- "label": null, -+ "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ -@@ -1461,4 +1461,4 @@ - "uid": "8b7a8b326d7a6f1f04244066368c67af", - "version": 0 - } --{{- end }} -\ No newline at end of file -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch index 34b63f1265..35ebc6fcf7 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/namespace-by-workload.yaml.patch @@ -9,19 +9,3 @@ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1535,7 +1535,7 @@ - "datasource": "$datasource", - "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, - "includeAll": false, -- "label": null, -+ "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ -@@ -1733,4 +1733,4 @@ - "uid": "bbb2a765a623ae38130206c7d94a160f", - "version": 0 - } --{{- end }} -\ No newline at end of file -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch index b053365bfe..e7c77d5e03 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml.patch @@ -13,16 +13,16 @@ "steppedLine": false, "targets": [ { -- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", -+ "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", +- "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", ++ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used Space", "refId": "A" }, { -- "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", -+ "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", +- "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", ++ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Free Space", @@ -30,8 +30,8 @@ "tableColumn": "", "targets": [ { -- "expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", -+ "expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", +- "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", ++ "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", @@ -39,16 +39,16 @@ "steppedLine": false, "targets": [ { -- "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", -+ "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", +- "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", ++ "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used inodes", "refId": "A" }, { -- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", -+ "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", +- "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", ++ "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": " Free inodes", @@ -56,8 +56,8 @@ "tableColumn": "", "targets": [ { -- "expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", -+ "expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n", +- "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", ++ "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", "format": "time_series", "intervalFactor": 2, "legendFormat": "", diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch index 7e4ae7e9eb..96bbe57475 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/pod-total.yaml.patch @@ -9,19 +9,3 @@ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1027,7 +1027,7 @@ - "datasource": "$datasource", - "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, - "includeAll": false, -- "label": null, -+ "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ -@@ -1225,4 +1225,4 @@ - "uid": "7a18067ce943a40ae25454675c19ff5c", - "version": 0 - } --{{- end }} -\ No newline at end of file -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch index d84a9e5e68..3d47af0fe8 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/proxy.yaml.patch @@ -4,8 +4,8 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }} -+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +-{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }} ++{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeProxy.enabled" .)}} apiVersion: v1 kind: ConfigMap @@ -19,11 +19,11 @@ "tableColumn": "", "targets": [ { -- "expr": "sum(up{job=\"kube-proxy\"})", +- "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", + {{- if .Values.k3sServer.enabled }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", metrics_path=\"/metrics\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeProxy.jobName" . }}\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\"})", + {{- end }} "format": "time_series", "intervalFactor": 2, @@ -32,8 +32,8 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", -+ "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", +- "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", ++ "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "rate", @@ -41,8 +41,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", -+ "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", +- "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", ++ "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -50,8 +50,8 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", -+ "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", +- "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m]))", ++ "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "rate", @@ -59,8 +59,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -68,32 +68,32 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -101,8 +101,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -110,8 +110,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -119,8 +119,8 @@ "steppedLine": false, "targets": [ { -- "expr": "process_resident_memory_bytes{job=\"kube-proxy\",instance=~\"$instance\"}", -+ "expr": "process_resident_memory_bytes{job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", +- "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", ++ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -128,8 +128,8 @@ "steppedLine": false, "targets": [ { -- "expr": "rate(process_cpu_seconds_total{job=\"kube-proxy\",instance=~\"$instance\"}[5m])", -+ "expr": "rate(process_cpu_seconds_total{job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}[5m])", +- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[5m])", ++ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -137,21 +137,21 @@ "steppedLine": false, "targets": [ { -- "expr": "go_goroutines{job=\"kube-proxy\",instance=~\"$instance\"}", -+ "expr": "go_goroutines{job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", +- "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", ++ "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1180,7 +1185,7 @@ +@@ -1206,7 +1211,7 @@ "options": [ ], -- "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"kube-proxy\"}, instance)", -+ "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{job=\"{{ include "exporter.kubeProxy.jobName" . }}\"}, instance)", +- "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", ++ "query": "label_values(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeProxy.jobName" . }}\"}, instance)", "refresh": 2, "regex": "", "sort": 1, -@@ -1228,4 +1233,4 @@ +@@ -1254,4 +1259,4 @@ "uid": "632e265de029684c40b21cb76bca4f94", "version": 0 } diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch index e052d23630..dac1a5db5f 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/scheduler.yaml.patch @@ -4,8 +4,8 @@ https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack */ -}} {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }} -+{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} +-{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }} ++{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- if (include "exporter.kubeScheduler.enabled" .)}} apiVersion: v1 kind: ConfigMap @@ -19,11 +19,11 @@ "tableColumn": "", "targets": [ { -- "expr": "sum(up{job=\"kube-scheduler\"})", +- "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", + {{- if .Values.k3sServer.enabled }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", metrics_path=\"/metrics\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", metrics_path=\"/metrics\"})", + {{- else }} -+ "expr": "sum(up{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"})", ++ "expr": "sum(up{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"})", + {{- end }} "format": "time_series", "intervalFactor": 2, @@ -32,32 +32,32 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "refId": "A" }, { -- "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_binding_duration_seconds_count{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} binding", "refId": "B" }, { -- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "refId": "C" }, { -- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", -+ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", +- "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])) by (instance)", ++ "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} volume", @@ -65,32 +65,32 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} e2e", "refId": "A" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} binding", "refId": "B" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} scheduling algorithm", "refId": "C" }, { -- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", +- "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[5m])) by (instance, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}[5m])) by (instance, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}} volume", @@ -98,32 +98,32 @@ "steppedLine": false, "targets": [ { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"2..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "2xx", "refId": "A" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"3..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "3xx", "refId": "B" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"4..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "4xx", "refId": "C" }, { -- "expr": "sum(rate(rest_client_requests_total{job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", -+ "expr": "sum(rate(rest_client_requests_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[5m]))", +- "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[5m]))", ++ "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\",code=~\"5..\"}[5m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "5xx", @@ -131,8 +131,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -140,8 +140,8 @@ "steppedLine": false, "targets": [ { -- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", -+ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", +- "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", ++ "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}verb{{`}}`}} {{`{{`}}url{{`}}`}}", @@ -149,8 +149,8 @@ "steppedLine": false, "targets": [ { -- "expr": "process_resident_memory_bytes{job=\"kube-scheduler\", instance=~\"$instance\"}", -+ "expr": "process_resident_memory_bytes{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}", +- "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", ++ "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -158,8 +158,8 @@ "steppedLine": false, "targets": [ { -- "expr": "rate(process_cpu_seconds_total{job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", -+ "expr": "rate(process_cpu_seconds_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])", +- "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[5m])", ++ "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\", instance=~\"$instance\"}[5m])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", @@ -167,21 +167,21 @@ "steppedLine": false, "targets": [ { -- "expr": "go_goroutines{job=\"kube-scheduler\",instance=~\"$instance\"}", -+ "expr": "go_goroutines{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}", +- "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", ++ "expr": "go_goroutines{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{`{{`}}instance{{`}}`}}", -@@ -1023,7 +1028,7 @@ +@@ -1049,7 +1054,7 @@ "options": [ ], -- "query": "label_values(process_cpu_seconds_total{job=\"kube-scheduler\"}, instance)", -+ "query": "label_values(process_cpu_seconds_total{job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, instance)", +- "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\"}, instance)", ++ "query": "label_values(process_cpu_seconds_total{cluster=\"$cluster\", job=\"{{ include "exporter.kubeScheduler.jobName" . }}\"}, instance)", "refresh": 2, "regex": "", "sort": 1, -@@ -1071,4 +1076,5 @@ +@@ -1097,4 +1102,5 @@ "uid": "2e6b6a3b4bddf1427b3a55aa1311c656", "version": 0 } diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch index 5d3adedf80..a607cdf2e6 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/statefulset.yaml.patch @@ -36,3 +36,10 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "", +@@ -925,4 +925,4 @@ + "uid": "a31c1f46e6f727cb37c0d731a7245005", + "version": 0 + } +-{{- end }} ++{{- end }} +\ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch index 87ad827e68..b8a643abda 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards-1.14/workload-total.yaml.patch @@ -9,19 +9,3 @@ name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} annotations: {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1205,7 +1205,7 @@ - "datasource": "$datasource", - "hide": {{ if .Values.grafana.sidecar.dashboards.multicluster }}0{{ else }}2{{ end }}, - "includeAll": false, -- "label": null, -+ "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ -@@ -1435,4 +1435,4 @@ - "uid": "728bf77cc1166d2f3133bf25846876cc", - "version": 0 - } --{{- end }} -\ No newline at end of file -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/etcd.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/etcd.yaml.patch deleted file mode 100644 index 59cd599f5e..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/etcd.yaml.patch +++ /dev/null @@ -1,24 +0,0 @@ ---- charts-original/templates/grafana/dashboards/etcd.yaml -+++ charts/templates/grafana/dashboards/etcd.yaml -@@ -4,11 +4,12 @@ - https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack - */ -}} - {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} -+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.grafana.enabled .Values.grafana.defaultDashboardsEnabled }} -+{{- if (include "exporter.kubeEtcd.enabled" .)}} - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -1113,4 +1114,5 @@ - "uid": "c2f4e12cdf69feb95caa41a5a1b423d9", - "version": 215 - } -+{{- end }} - {{- end }} -\ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-cluster-rsrc-use.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-cluster-rsrc-use.yaml.patch deleted file mode 100644 index 0fac015597..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-cluster-rsrc-use.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-cluster-rsrc-use.yaml -+++ charts/templates/grafana/dashboards/k8s-cluster-rsrc-use.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-node-rsrc-use.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-node-rsrc-use.yaml.patch deleted file mode 100644 index 3c714730d3..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-node-rsrc-use.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-node-rsrc-use.yaml -+++ charts/templates/grafana/dashboards/k8s-node-rsrc-use.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-node-rsrc-use" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-cluster.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-cluster.yaml.patch deleted file mode 100644 index 183b03b689..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-cluster.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-resources-cluster.yaml -+++ charts/templates/grafana/dashboards/k8s-resources-cluster.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-namespace.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-namespace.yaml.patch deleted file mode 100644 index d5f97650d4..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-namespace.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-resources-namespace.yaml -+++ charts/templates/grafana/dashboards/k8s-resources-namespace.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-pod.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-pod.yaml.patch deleted file mode 100644 index 2de5720366..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-pod.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-resources-pod.yaml -+++ charts/templates/grafana/dashboards/k8s-resources-pod.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workload.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workload.yaml.patch deleted file mode 100644 index 61304b64d3..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workload.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-resources-workload.yaml -+++ charts/templates/grafana/dashboards/k8s-resources-workload.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workloads-namespace.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workloads-namespace.yaml.patch deleted file mode 100644 index 8e0cb55d61..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/k8s-resources-workloads-namespace.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/k8s-resources-workloads-namespace.yaml -+++ charts/templates/grafana/dashboards/k8s-resources-workloads-namespace.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/nodes.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/nodes.yaml.patch deleted file mode 100644 index 6c711730ad..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/nodes.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/grafana/dashboards/nodes.yaml -+++ charts/templates/grafana/dashboards/nodes.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/persistentvolumesusage.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/persistentvolumesusage.yaml.patch deleted file mode 100644 index 98c1a55900..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/persistentvolumesusage.yaml.patch +++ /dev/null @@ -1,81 +0,0 @@ ---- charts-original/templates/grafana/dashboards/persistentvolumesusage.yaml -+++ charts/templates/grafana/dashboards/persistentvolumesusage.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -90,14 +90,14 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", -+ "expr": "(\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used Space", - "refId": "A" - }, - { -- "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", -+ "expr": "sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Free Space", -@@ -205,7 +205,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", -+ "expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", -@@ -285,14 +285,14 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", -+ "expr": "sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used inodes", - "refId": "A" - }, - { -- "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", -+ "expr": "(\n sum without(instance, node) (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n sum without(instance, node) (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": " Free inodes", -@@ -400,7 +400,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", -+ "expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", -@@ -496,7 +496,7 @@ - "options": [ - - ], -- "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", -+ "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\"}, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, -@@ -522,7 +522,7 @@ - "options": [ - - ], -- "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", -+ "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\"}, persistentvolumeclaim)", - "refresh": 2, - "regex": "", - "sort": 0, diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/pods.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/pods.yaml.patch deleted file mode 100644 index b18823175f..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/pods.yaml.patch +++ /dev/null @@ -1,55 +0,0 @@ ---- charts-original/templates/grafana/dashboards/pods.yaml -+++ charts/templates/grafana/dashboards/pods.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pods" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -103,7 +103,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", -+ "expr": "sum by(container_name) (container_memory_usage_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{`{{`}} container_name {{`}}`}}", -@@ -124,7 +124,7 @@ - "refId": "C" - }, - { -- "expr": "sum by(container_name) (container_memory_cache{job=\"kubelet\", namespace=\"$namespace\", pod_name=~\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", -+ "expr": "sum by(container_name) (container_memory_cache{job=\"{{ include "exporter.kubelet.jobName" . }}\", namespace=\"$namespace\", pod_name=~\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cache: {{`{{`}} container_name {{`}}`}}", -@@ -228,7 +228,7 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))", -+ "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{`{{`}} container_name {{`}}`}}", -@@ -346,14 +346,14 @@ - "steppedLine": false, - "targets": [ - { -- "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", -+ "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RX: {{`{{`}} pod_name {{`}}`}}", - "refId": "A" - }, - { -- "expr": "sort_desc(sum by (pod_name) (rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", -+ "expr": "sort_desc(sum by (pod_name) (rate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=\"$pod\"}[1m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "TX: {{`{{`}} pod_name {{`}}`}}", diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/statefulset.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/statefulset.yaml.patch deleted file mode 100644 index b584811ed8..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/grafana/dashboards/statefulset.yaml.patch +++ /dev/null @@ -1,38 +0,0 @@ ---- charts-original/templates/grafana/dashboards/statefulset.yaml -+++ charts/templates/grafana/dashboards/statefulset.yaml -@@ -8,7 +8,7 @@ - apiVersion: v1 - kind: ConfigMap - metadata: -- namespace: {{ template "kube-prometheus-stack.namespace" . }} -+ namespace: {{ .Values.grafana.defaultDashboards.namespace }} - name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "statefulset" | trunc 63 | trimSuffix "-" }} - annotations: - {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} -@@ -106,7 +106,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", -+ "expr": "sum(rate(container_cpu_usage_seconds_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", -@@ -189,7 +189,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(container_memory_usage_bytes{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", -+ "expr": "sum(container_memory_usage_bytes{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}) / 1024^3", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", -@@ -272,7 +272,7 @@ - "tableColumn": "", - "targets": [ - { -- "expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", -+ "expr": "sum(rate(container_network_transmit_bytes_total{job=\"{{ include "exporter.kubelet.jobName" . }}\", cluster=\"$cluster\", namespace=\"$namespace\", pod_name=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod_name=~\"$statefulset.*\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml.patch index e7218cb537..fc7d98bcff 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml.patch @@ -12,7 +12,7 @@ {{- end }} imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} args: -@@ -46,18 +46,18 @@ +@@ -46,16 +46,16 @@ {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} restartPolicy: OnFailure serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission @@ -32,8 +32,5 @@ + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} {{ toYaml . | indent 8 }} -- {{- end }} -+{{- end }} - securityContext: - runAsGroup: 2000 - runAsNonRoot: true + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml.patch index e203592d54..268aa50d49 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml.patch @@ -12,7 +12,7 @@ {{- end }} imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} args: -@@ -47,18 +47,18 @@ +@@ -47,16 +47,16 @@ {{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} restartPolicy: OnFailure serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission @@ -32,8 +32,5 @@ + tolerations: {{ include "linux-node-tolerations" . | nindent 8 }} +{{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} {{ toYaml . | indent 8 }} -- {{- end }} -+{{- end }} - securityContext: - runAsGroup: 2000 - runAsNonRoot: true + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch index 9cd58343e0..bc12744176 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/prometheus.yaml.patch @@ -9,7 +9,7 @@ version: {{ .Values.prometheus.prometheusSpec.image.tag }} {{- if .Values.prometheus.prometheusSpec.image.sha }} sha: {{ .Values.prometheus.prometheusSpec.image.sha }} -@@ -56,11 +56,16 @@ +@@ -56,11 +56,13 @@ externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}" {{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }} externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}" @@ -18,16 +18,13 @@ {{- else }} externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }} {{- end }} -+{{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} -+ ignoreNamespaceSelectors: {{ .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} -+{{- end }} + nodeSelector: {{ include "linux-node-selector" . | nindent 4 }} {{- if .Values.prometheus.prometheusSpec.nodeSelector }} - nodeSelector: {{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }} {{- end }} paused: {{ .Values.prometheus.prometheusSpec.paused }} -@@ -216,8 +221,8 @@ +@@ -232,8 +234,8 @@ - {key: prometheus, operator: In, values: [{{ template "kube-prometheus-stack.fullname" . }}-prometheus]} {{- end }} {{- end }} @@ -37,7 +34,7 @@ {{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} {{- end }} {{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} -@@ -250,7 +255,7 @@ +@@ -266,7 +268,7 @@ {{- end }} {{- if .Values.prometheus.prometheusSpec.containers }} containers: @@ -46,7 +43,7 @@ {{- end }} {{- if .Values.prometheus.prometheusSpec.initContainers }} initContainers: -@@ -266,6 +271,7 @@ +@@ -282,6 +284,7 @@ {{- if .Values.prometheus.prometheusSpec.disableCompaction }} disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }} {{- end }} @@ -54,7 +51,7 @@ portName: {{ .Values.prometheus.prometheusSpec.portName }} {{- end }} {{- if .Values.prometheus.prometheusSpec.volumes }} -@@ -310,3 +316,4 @@ +@@ -326,3 +329,4 @@ {{- if .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} {{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/etcd.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/etcd.yaml.patch index 1ace708ae3..e8b7b58112 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/etcd.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/etcd.yaml.patch @@ -14,7 +14,6 @@ {{- if .Values.defaultRules.additionalRuleLabels }} {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} {{- end }} --{{- end }} -\ No newline at end of file -+{{- end }} +{{- end }} + {{- end }} +\ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch index fa43f0aeb4..fa81d2ae64 100644 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules-1.14/k8s.rules.yaml.patch @@ -4,8 +4,8 @@ rules: - expr: |- sum by (cluster, namespace, pod, container) ( -- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) -+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m]) +- rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m]) ++ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}) ) diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/etcd.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/etcd.yaml.patch deleted file mode 100644 index 3cd6667539..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/etcd.yaml.patch +++ /dev/null @@ -1,20 +0,0 @@ ---- charts-original/templates/prometheus/rules/etcd.yaml -+++ charts/templates/prometheus/rules/etcd.yaml -@@ -4,7 +4,8 @@ - https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack - */ -}} - {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }} -+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.etcd }} -+{{- if (include "exporter.kubeEtcd.enabled" .)}} - apiVersion: monitoring.coreos.com/v1 - kind: PrometheusRule - metadata: -@@ -176,4 +177,5 @@ - {{- if .Values.defaultRules.additionalRuleLabels }} - {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} - {{- end }} --{{- end }} -\ No newline at end of file -+{{- end }} -+{{- end }} diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/k8s.rules.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/k8s.rules.yaml.patch deleted file mode 100644 index 1ece223d04..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/k8s.rules.yaml.patch +++ /dev/null @@ -1,19 +0,0 @@ ---- charts-original/templates/prometheus/rules/k8s.rules.yaml -+++ charts/templates/prometheus/rules/k8s.rules.yaml -@@ -24,13 +24,13 @@ - groups: - - name: k8s.rules - rules: -- - expr: sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m])) by (namespace) -+ - expr: sum(rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}[5m])) by (namespace) - record: namespace:container_cpu_usage_seconds_total:sum_rate -- - expr: sum(container_memory_usage_bytes{job="kubelet", image!="", container_name!=""}) by (namespace) -+ - expr: sum(container_memory_usage_bytes{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}) by (namespace) - record: namespace:container_memory_usage_bytes:sum - - expr: |- - sum by (namespace, pod_name, container_name) ( -- rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container_name!=""}[5m]) -+ rate(container_cpu_usage_seconds_total{job="{{ include "exporter.kubelet.jobName" . }}", image!="", container_name!=""}[5m]) - ) - record: namespace_pod_name_container_name:container_cpu_usage_seconds_total:sum_rate - - expr: |- diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kube-scheduler.rules.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kube-scheduler.rules.yaml.patch deleted file mode 100644 index c0f6a87512..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kube-scheduler.rules.yaml.patch +++ /dev/null @@ -1,64 +0,0 @@ ---- charts-original/templates/prometheus/rules/kube-scheduler.rules.yaml -+++ charts/templates/prometheus/rules/kube-scheduler.rules.yaml -@@ -4,7 +4,8 @@ - https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack - */ -}} - {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} --{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeScheduler }} -+{{- if and (semverCompare ">=1.10.0-0" $kubeTargetVersion) (semverCompare "<1.14.0-0" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeScheduler }} -+{{- if (include "exporter.kubeScheduler.enabled" .)}} - apiVersion: monitoring.coreos.com/v1 - kind: PrometheusRule - metadata: -@@ -24,40 +25,41 @@ - groups: - - name: kube-scheduler.rules - rules: -- - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile -- - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile -- - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.99, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.99' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile -- - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.9, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile -- - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile -- - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.9, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.9' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile -- - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.5, sum(rate(scheduler_e2e_scheduling_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_e2e_scheduling_latency:histogram_quantile -- - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_scheduling_algorithm_latency:histogram_quantile -- - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="kube-scheduler"}[5m])) without(instance, pod)) / 1e+06 -+ - expr: histogram_quantile(0.5, sum(rate(scheduler_binding_latency_microseconds_bucket{job="{{ include "exporter.kubeScheduler.jobName" . }}"}[5m])) without(instance, pod)) / 1e+06 - labels: - quantile: '0.5' - record: cluster_quantile:scheduler_binding_latency:histogram_quantile -+{{- end }} - {{- end }} -\ No newline at end of file diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-absent.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-absent.yaml.patch deleted file mode 100644 index ae7564d593..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-absent.yaml.patch +++ /dev/null @@ -1,47 +0,0 @@ ---- charts-original/templates/prometheus/rules/kubernetes-absent.yaml -+++ charts/templates/prometheus/rules/kubernetes-absent.yaml -@@ -67,12 +67,12 @@ - {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} - {{- end }} - {{- end }} --{{- if .Values.kubeControllerManager.enabled }} -+{{- if (include "exporter.kubeControllerManager.enabled" .)}} - - alert: KubeControllerManagerDown - annotations: - message: KubeControllerManager has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubecontrollermanagerdown -- expr: absent(up{job="kube-controller-manager"} == 1) -+ expr: absent(up{job="{{ include "exporter.kubeControllerManager.jobName" . }}"} == 1) - for: 15m - labels: - severity: critical -@@ -80,12 +80,12 @@ - {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} - {{- end }} - {{- end }} --{{- if .Values.kubeScheduler.enabled }} -+{{- if (include "exporter.kubeScheduler.enabled" .)}} - - alert: KubeSchedulerDown - annotations: - message: KubeScheduler has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeschedulerdown -- expr: absent(up{job="kube-scheduler"} == 1) -+ expr: absent(up{job="{{ include "exporter.kubeScheduler.jobName" . }}"} == 1) - for: 15m - labels: - severity: critical -@@ -106,12 +106,12 @@ - {{ toYaml .Values.defaultRules.additionalRuleLabels | indent 8 }} - {{- end }} - {{- end }} --{{- if .Values.prometheusOperator.kubeletService.enabled }} -+{{- if (include "exporter.kubeletService.enabled" .) }} - - alert: KubeletDown - annotations: - message: Kubelet has disappeared from Prometheus target discovery. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubeletdown -- expr: absent(up{job="kubelet"} == 1) -+ expr: absent(up{job="{{ include "exporter.kubelet.jobName" . }}"} == 1) - for: 15m - labels: - severity: critical diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-storage.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-storage.yaml.patch deleted file mode 100644 index 046ede3c2d..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-storage.yaml.patch +++ /dev/null @@ -1,30 +0,0 @@ ---- charts-original/templates/prometheus/rules/kubernetes-storage.yaml -+++ charts/templates/prometheus/rules/kubernetes-storage.yaml -@@ -30,9 +30,9 @@ - message: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} is only {{`{{`}} printf "%0.2f" $value {{`}}`}}% free. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumeusagecritical - expr: |- -- 100 * kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} -+ 100 * kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"} - / -- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} -+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"} - < 3 - for: 1m - labels: -@@ -46,12 +46,12 @@ - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubepersistentvolumefullinfourdays - expr: |- - 100 * ( -- kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} -+ kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"} - / -- kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"} -+ kubelet_volume_stats_capacity_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"} - ) < 15 - and -- predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0 -+ predict_linear(kubelet_volume_stats_available_bytes{job="{{ include "exporter.kubelet.jobName" . }}", namespace=~"{{ $targetNamespace }}"}[6h], 4 * 24 * 3600) < 0 - for: 5m - labels: - severity: critical diff --git a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-system.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-system.yaml.patch deleted file mode 100644 index d7a261b063..0000000000 --- a/packages/rancher-monitoring/generated-changes/patch/templates/prometheus/rules/kubernetes-system.yaml.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- charts-original/templates/prometheus/rules/kubernetes-system.yaml -+++ charts/templates/prometheus/rules/kubernetes-system.yaml -@@ -76,7 +76,7 @@ - annotations: - message: Kubelet {{`{{`}} $labels.instance {{`}}`}} is running {{`{{`}} $value {{`}}`}} Pods, close to the limit of 110. - runbook_url: {{ .Values.defaultRules.runbookUrl }}alert-name-kubelettoomanypods -- expr: kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 -+ expr: kubelet_running_pod_count{job="{{ include "exporter.kubelet.jobName" . }}"} > 110 * 0.9 - for: 15m - labels: - severity: warning diff --git a/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch b/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch index 75d4ac9fed..7f5f88d989 100644 --- a/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-monitoring/generated-changes/patch/values.yaml.patch @@ -1,6 +1,6 @@ --- charts-original/values.yaml +++ charts/values.yaml -@@ -2,13 +2,423 @@ +@@ -2,13 +2,427 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. @@ -393,6 +393,10 @@ + ## + interval: "" + ++ ## proxyUrl: URL of a proxy that should be used for scraping. ++ ## ++ proxyUrl: "" ++ + ## metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] @@ -426,7 +430,7 @@ ## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.16.6 ## -@@ -89,8 +499,32 @@ +@@ -93,8 +507,32 @@ ## global: @@ -459,30 +463,7 @@ pspEnabled: true pspAnnotations: {} ## Specify pod annotations -@@ -143,6 +577,22 @@ - ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file - ## https://prometheus.io/webtools/alerting/routing-tree-editor/ - ## -+ ## Example Slack Config -+ ## config: -+ ## route: -+ ## group_by: ['job'] -+ ## group_wait: 30s -+ ## group_interval: 5m -+ ## repeat_interval: 3h -+ ## receiver: 'slack-notifications' -+ ## receivers: -+ ## - name: 'slack-notifications' -+ ## slack_configs: -+ ## - send_resolved: true -+ ## text: '{{ template "slack.rancher.text" . }}' -+ ## api_url: -+ ## templates: -+ ## - /etc/alertmanager/config/*.tmpl - config: - global: - resolve_timeout: 5m -@@ -179,25 +629,76 @@ +@@ -187,25 +625,76 @@ ## ref: https://prometheus.io/docs/alerting/notifications/ ## https://prometheus.io/docs/alerting/notification_examples/ ## @@ -501,7 +482,7 @@ - # *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> - # *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:> - # *Details:* -- # {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` +- # {{ range .Labels.SortedPairs }} - *{{ .Name }}:* `{{ .Value }}` - # {{ end }} - # {{ end }} - # {{ end }} @@ -578,7 +559,7 @@ ingress: enabled: false -@@ -235,6 +736,25 @@ +@@ -243,6 +732,25 @@ ## Configuration for Alertmanager secret ## secret: @@ -604,25 +585,16 @@ annotations: {} ## Configuration for creating an Ingress that will map to each Alertmanager replica service -@@ -352,7 +872,7 @@ - ## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig - tlsConfig: {} - -- bearerTokenFile: -+ bearerTokenFile: "" - - ## metric relabel configs to apply to samples before ingestion. - ## -@@ -383,7 +903,7 @@ +@@ -395,7 +903,7 @@ ## Image of Alertmanager ## image: - repository: quay.io/prometheus/alertmanager -+ repository: rancher/mirrored-prom-alertmanager - tag: v0.21.0 ++ repository: rancher/mirrored-prometheus-alertmanager + tag: v0.22.2 sha: "" -@@ -495,9 +1015,13 @@ +@@ -507,9 +1015,13 @@ ## Define resources requests and limits for single Pods. ## ref: https://kubernetes.io/docs/user-guide/compute-resources/ ## @@ -639,7 +611,7 @@ ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. -@@ -601,10 +1125,46 @@ +@@ -613,6 +1125,30 @@ enabled: true namespaceOverride: "" @@ -667,7 +639,10 @@ + deploymentStrategy: + type: Recreate + - ## Deploy default dashboards. + ## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled + ## + forceDeployDatasources: false +@@ -625,6 +1161,18 @@ ## defaultDashboardsEnabled: true @@ -686,7 +661,7 @@ adminPassword: prom-operator ingress: -@@ -644,6 +1204,7 @@ +@@ -664,6 +1212,7 @@ dashboards: enabled: true label: grafana_dashboard @@ -694,7 +669,7 @@ ## Annotations for Grafana dashboard configmaps ## -@@ -692,7 +1253,60 @@ +@@ -716,7 +1265,60 @@ ## Passed to grafana subchart and used by servicemonitor below ## service: @@ -756,7 +731,7 @@ ## If true, create a serviceMonitor for grafana ## -@@ -722,6 +1336,14 @@ +@@ -746,6 +1348,14 @@ # targetLabel: nodename # replacement: $1 # action: replace @@ -771,7 +746,7 @@ ## Component scraping the kube api server ## -@@ -879,7 +1501,7 @@ +@@ -907,7 +1517,7 @@ ## Component scraping the kube controller manager ## kubeControllerManager: @@ -780,7 +755,7 @@ ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on ## -@@ -1014,7 +1636,7 @@ +@@ -1054,7 +1664,7 @@ ## Component scraping etcd ## kubeEtcd: @@ -789,7 +764,7 @@ ## If your etcd is not deployed as a pod, specify IPs it can be found on ## -@@ -1076,7 +1698,7 @@ +@@ -1119,7 +1729,7 @@ ## Component scraping kube scheduler ## kubeScheduler: @@ -798,7 +773,7 @@ ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on ## -@@ -1131,7 +1753,7 @@ +@@ -1177,7 +1787,7 @@ ## Component scraping kube proxy ## kubeProxy: @@ -807,7 +782,7 @@ ## If your kube proxy is not deployed as a pod, specify IPs it can be found on ## -@@ -1210,6 +1832,13 @@ +@@ -1266,6 +1876,13 @@ create: true podSecurityPolicy: enabled: true @@ -821,7 +796,7 @@ ## Deploy node exporter as a daemonset to all nodes ## -@@ -1259,6 +1888,16 @@ +@@ -1319,6 +1936,16 @@ extraArgs: - --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) - --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ @@ -838,7 +813,7 @@ ## Manages Prometheus and Alertmanager components ## -@@ -1271,8 +1910,8 @@ +@@ -1331,8 +1958,8 @@ enabled: true # Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants tlsMinVersion: VersionTLS13 @@ -849,16 +824,16 @@ ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted ## rules from making their way into prometheus and potentially preventing the container from starting -@@ -1289,7 +1928,7 @@ +@@ -1349,7 +1976,7 @@ patch: enabled: true image: - repository: jettech/kube-webhook-certgen + repository: rancher/mirrored-jettech-kube-webhook-certgen - tag: v1.5.0 + tag: v1.5.2 sha: "" pullPolicy: IfNotPresent -@@ -1428,13 +2067,13 @@ +@@ -1498,13 +2125,13 @@ ## Resource limits & requests ## @@ -879,40 +854,25 @@ # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working -@@ -1487,7 +2126,7 @@ +@@ -1557,7 +2184,7 @@ ## Prometheus-operator image ## image: - repository: quay.io/prometheus-operator/prometheus-operator + repository: rancher/mirrored-prometheus-operator-prometheus-operator - tag: v0.46.0 + tag: v0.48.0 sha: "" pullPolicy: IfNotPresent -@@ -1503,7 +2142,7 @@ +@@ -1573,7 +2200,7 @@ ## Prometheus-config-reloader image to use for config and rule reloading ## prometheusConfigReloaderImage: - repository: quay.io/prometheus-operator/prometheus-config-reloader + repository: rancher/mirrored-prometheus-operator-prometheus-config-reloader - tag: v0.46.0 + tag: v0.48.0 sha: "" -@@ -1558,6 +2197,14 @@ - ## - nodePort: 30901 - -+ ## Service type -+ ## -+ type: ClusterIP -+ -+ ## Port to expose on each node -+ ## -+ nodePort: 30901 -+ - ## Configuration for Prometheus service - ## - service: -@@ -1570,7 +2217,7 @@ +@@ -1659,7 +2286,7 @@ port: 9090 ## To be used with a proxy extraContainer port @@ -921,16 +881,16 @@ ## List of IP addresses at which the Prometheus server service is available ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips -@@ -1822,7 +2469,7 @@ +@@ -1916,7 +2543,7 @@ ## Image of Prometheus. ## image: - repository: quay.io/prometheus/prometheus + repository: rancher/mirrored-prometheus-prometheus - tag: v2.24.0 + tag: v2.27.1 sha: "" -@@ -1885,6 +2532,11 @@ +@@ -1979,6 +2606,11 @@ ## externalUrl: "" @@ -942,7 +902,7 @@ ## Define which Nodes the Pods are scheduled on. ## ref: https://kubernetes.io/docs/user-guide/node-selection/ ## -@@ -1917,7 +2569,7 @@ +@@ -2011,7 +2643,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the PrometheusRule resources created ## @@ -951,7 +911,7 @@ ## PrometheusRules to be selected for target discovery. ## If {}, select all PrometheusRules -@@ -1942,7 +2594,7 @@ +@@ -2036,7 +2668,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the servicemonitors created ## @@ -960,7 +920,7 @@ ## ServiceMonitors to be selected for target discovery. ## If {}, select all ServiceMonitors -@@ -1965,7 +2617,7 @@ +@@ -2059,7 +2691,7 @@ ## prometheus resource to be created with selectors based on values in the helm deployment, ## which will also match the podmonitors created ## @@ -969,7 +929,7 @@ ## PodMonitors to be selected for target discovery. ## If {}, select all PodMonitors -@@ -2092,9 +2744,13 @@ +@@ -2190,9 +2822,13 @@ ## Resource limits & requests ## @@ -986,7 +946,7 @@ ## Prometheus StorageSpec for persistent data ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/user-guides/storage.md -@@ -2117,7 +2773,13 @@ +@@ -2215,7 +2851,13 @@ # medium: Memory # Additional volumes on the output StatefulSet definition. @@ -1001,7 +961,7 @@ # Additional VolumeMounts on the output StatefulSet definition. volumeMounts: [] -@@ -2224,9 +2886,34 @@ +@@ -2322,9 +2964,34 @@ ## thanos: {} @@ -1037,7 +997,7 @@ ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes ## (permissions, dir tree) on mounted volumes before starting prometheus -@@ -2234,7 +2921,7 @@ +@@ -2332,7 +2999,7 @@ ## PortName to use for Prometheus. ## diff --git a/packages/rancher-monitoring/package.yaml b/packages/rancher-monitoring/package.yaml index c35618f8aa..59a93ece0f 100644 --- a/packages/rancher-monitoring/package.yaml +++ b/packages/rancher-monitoring/package.yaml @@ -1,6 +1,6 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/kube-prometheus-stack -commit: 3ca6ba66032a1efce0500f9ad6f83351ad0604b8 +commit: ba91bdb2d79ca4419cf72078f5f4bfcc426d4599 version: 100.0.0 additionalCharts: - workingDir: charts-crd diff --git a/packages/rancher-monitoring/templates/crd-template/Chart.yaml b/packages/rancher-monitoring/templates/crd-template/Chart.yaml index 8931de182c..d749806506 100644 --- a/packages/rancher-monitoring/templates/crd-template/Chart.yaml +++ b/packages/rancher-monitoring/templates/crd-template/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 -version: 14.5.1 +version: 16.6.0 description: Installs the CRDs for rancher-monitoring. name: rancher-monitoring-crd type: application diff --git a/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch index 7bab565d95..8a570e2aee 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/Chart.yaml.patch @@ -12,6 +12,6 @@ description: A Helm chart for prometheus node-exporter -name: prometheus-node-exporter +name: rancher-node-exporter - version: 1.16.2 + version: 1.18.1 home: https://github.com/prometheus/node_exporter/ sources: diff --git a/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch index ad9668dd37..c2844c29c8 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/templates/daemonset.yaml.patch @@ -1,7 +1,7 @@ --- charts-original/templates/daemonset.yaml +++ charts/templates/daemonset.yaml -@@ -31,7 +31,7 @@ - {{- end }} +@@ -35,7 +35,7 @@ + {{- end }} containers: - name: node-exporter - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" @@ -9,7 +9,7 @@ imagePullPolicy: {{ .Values.image.pullPolicy }} args: - --path.procfs=/host/proc -@@ -129,18 +129,18 @@ +@@ -133,18 +133,18 @@ affinity: {{ toYaml .Values.affinity | indent 8 }} {{- end }} diff --git a/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch b/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch index 07cdd90d35..af47a4e7a2 100644 --- a/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-node-exporter/generated-changes/patch/values.yaml.patch @@ -15,7 +15,7 @@ tag: v1.1.2 pullPolicy: IfNotPresent -@@ -125,6 +130,8 @@ +@@ -126,6 +131,8 @@ tolerations: - effect: NoSchedule operator: Exists diff --git a/packages/rancher-node-exporter/package.yaml b/packages/rancher-node-exporter/package.yaml index bad0a763ec..ab4afc92a2 100644 --- a/packages/rancher-node-exporter/package.yaml +++ b/packages/rancher-node-exporter/package.yaml @@ -1,4 +1,4 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/prometheus-node-exporter -commit: 78d1219306eaca5be1d8bab84445d0489e073840 +commit: a05f7b8888d6174827b815aa097d64b94f00af3e version: 100.0.0 diff --git a/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch b/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch index 05abc6f4d4..2d97a05e58 100644 --- a/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch +++ b/packages/rancher-prometheus-adapter/generated-changes/patch/Chart.yaml.patch @@ -10,6 +10,6 @@ apiVersion: v1 -name: prometheus-adapter +name: rancher-prometheus-adapter - version: 2.12.1 - appVersion: v0.8.3 + version: 2.14.0 + appVersion: v0.8.4 description: A Helm chart for k8s prometheus adapter diff --git a/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch b/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch index 3e46011a58..d28b16a4ef 100644 --- a/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch +++ b/packages/rancher-prometheus-adapter/generated-changes/patch/values.yaml.patch @@ -11,6 +11,6 @@ image: - repository: directxman12/k8s-prometheus-adapter-amd64 + repository: rancher/mirrored-directxman12-k8s-prometheus-adapter - tag: v0.8.3 + tag: v0.8.4 pullPolicy: IfNotPresent diff --git a/packages/rancher-prometheus-adapter/package.yaml b/packages/rancher-prometheus-adapter/package.yaml index 2acdb9c617..f7e7759146 100644 --- a/packages/rancher-prometheus-adapter/package.yaml +++ b/packages/rancher-prometheus-adapter/package.yaml @@ -1,4 +1,4 @@ url: https://github.com/prometheus-community/helm-charts.git subdirectory: charts/prometheus-adapter -commit: 75a88bfdb3cccb9a5b48148c0aee12e94d4128d4 +commit: 50b719af447594abbae7beeb4d6458a19f8e9689 version: 100.0.0 diff --git a/packages/rancher-windows-exporter/charts/Chart.yaml b/packages/rancher-windows-exporter/charts/Chart.yaml index 57c0888459..d828395f59 100755 --- a/packages/rancher-windows-exporter/charts/Chart.yaml +++ b/packages/rancher-windows-exporter/charts/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v1 -version: 0.1.0 -appVersion: 0.0.4 +version: 0.1.1 +appVersion: 0.0.2 annotations: catalog.rancher.io/certified: rancher catalog.rancher.io/namespace: cattle-monitoring-system diff --git a/packages/rancher-windows-exporter/charts/templates/_helpers.tpl b/packages/rancher-windows-exporter/charts/templates/_helpers.tpl index 4fc68cf975..16975d9d05 100644 --- a/packages/rancher-windows-exporter/charts/templates/_helpers.tpl +++ b/packages/rancher-windows-exporter/charts/templates/_helpers.tpl @@ -71,3 +71,43 @@ kubernetes.io/os: windows {{- end -}} {{- end -}} {{- end -}} + +{{- define "windowsExporter.renamedMetrics" -}} +{{- $renamed := dict -}} +{{/* v0.15.0 */}} +{{- $_ := set $renamed "windows_mssql_transactions_active_total" "windows_mssql_transactions_active" -}} +{{/* v0.16.0 */}} +{{- $_ := set $renamed "windows_adfs_ad_login_connection_failures" "windows_adfs_ad_login_connection_failures_total" -}} +{{- $_ := set $renamed "windows_adfs_certificate_authentications" "windows_adfs_certificate_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_device_authentications" "windows_adfs_device_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_extranet_account_lockouts" "windows_adfs_extranet_account_lockouts_total" -}} +{{- $_ := set $renamed "windows_adfs_federated_authentications" "windows_adfs_federated_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_passport_authentications" "windows_adfs_passport_authentications_total" -}} +{{- $_ := set $renamed "windows_adfs_password_change_failed" "windows_adfs_password_change_failed_total" -}} +{{- $_ := set $renamed "windows_adfs_password_change_succeeded" "windows_adfs_password_change_succeeded_total" -}} +{{- $_ := set $renamed "windows_adfs_token_requests" "windows_adfs_token_requests_total" -}} +{{- $_ := set $renamed "windows_adfs_windows_integrated_authentications" "windows_adfs_windows_integrated_authentications_total" -}} +{{- $_ := set $renamed "windows_net_packets_outbound_errors" "windows_net_packets_outbound_errors_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_discarded" "windows_net_packets_received_discarded_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_errors" "windows_net_packets_received_errors_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_total" "windows_net_packets_received_total_total" -}} +{{- $_ := set $renamed "windows_net_packets_received_unknown" "windows_net_packets_received_unknown_total" -}} +{{- $_ := set $renamed "windows_dns_memory_used_bytes_total" "windows_dns_memory_used_bytes" -}} +{{- $renamed | toJson -}} +{{- end -}} + +{{- define "windowsExporter.renamedMetricsRelabeling" -}} +{{- range $original, $new := (include "windowsExporter.renamedMetrics" . | fromJson) -}} +- sourceLabels: [__name__] + regex: {{ $original }} + replacement: '{{ $new }}' + targetLabel: __name__ +{{ end -}} +{{- end -}} + +{{- define "windowsExporter.renamedMetricsRules" -}} +{{- range $original, $new := (include "windowsExporter.renamedMetrics" . | fromJson) -}} +- record: {{ $original }} + expr: {{ $new }} +{{ end -}} +{{- end -}} diff --git a/packages/rancher-windows-exporter/charts/templates/prometheusrule.yaml b/packages/rancher-windows-exporter/charts/templates/prometheusrule.yaml new file mode 100644 index 0000000000..f31983122a --- /dev/null +++ b/packages/rancher-windows-exporter/charts/templates/prometheusrule.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.prometheusRule .Values.clients }}{{- if and .Values.prometheusRule.enabled .Values.clients.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: {{ include "windowsExporter.labels" . | nindent 4 }} + name: {{ template "windowsExporter.name" . }} + namespace: {{ template "windowsExporter.namespace" . }} +spec: + groups: + - name: windows-exporter-relabel.rules + rules: +{{- include "windowsExporter.renamedMetricsRules" . | nindent 4 -}} +{{- end }}{{- end }} \ No newline at end of file diff --git a/packages/rancher-windows-exporter/charts/templates/servicemonitor.yaml b/packages/rancher-windows-exporter/charts/templates/servicemonitor.yaml index a2c2f0b54e..26ece9b05a 100644 --- a/packages/rancher-windows-exporter/charts/templates/servicemonitor.yaml +++ b/packages/rancher-windows-exporter/charts/templates/servicemonitor.yaml @@ -17,14 +17,11 @@ spec: endpoints: - port: windows-metrics metricRelabelings: +{{- include "windowsExporter.renamedMetricsRelabeling" . | nindent 4 -}} - sourceLabels: [__name__] regex: 'wmi_(.*)' replacement: 'windows_$1' targetLabel: __name__ - - sourceLabels: [__name__] - regex: windows_mssql_transactions_active_total - replacement: 'windows_mssql_transactions_active' - targetLabel: __name__ - sourceLabels: [volume, nic] regex: (.*);(.*) separator: '' diff --git a/packages/rancher-windows-exporter/charts/values.yaml b/packages/rancher-windows-exporter/charts/values.yaml index 6130890bd8..aa1fd19735 100755 --- a/packages/rancher-windows-exporter/charts/values.yaml +++ b/packages/rancher-windows-exporter/charts/values.yaml @@ -13,6 +13,10 @@ global: serviceMonitor: enabled: true +# Configure PrometheusRule that renames existing metrics +prometheusRule: + enabled: true + ## Components scraping metrics from Windows nodes ## clients: @@ -21,7 +25,7 @@ clients: port: 9796 image: repository: rancher/windows_exporter-package - tag: v0.0.1 + tag: v0.0.2 os: "windows" # Specify the IP addresses of nodes that you want to collect metrics from