From c056e85bf2923c24b8718983562be552d318ed82 Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 16 May 2022 13:54:37 +0800 Subject: [PATCH 1/4] add latency for loading region region Signed-off-by: you06 add load region ok panel Signed-off-by: you06 update client-go Signed-off-by: you06 update client-go Signed-off-by: you06 --- metrics/grafana/tidb.json | 190 +++++++++++++++++++++++++++++++++++++- 1 file changed, 188 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index a9bbf1ab4b83f..2b73992322800 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -12380,7 +12380,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB region cache operations count", + "description": "TiDB successful region cache operations count", "fill": 1, "gridPos": { "h": 7, @@ -12410,6 +12410,93 @@ "spaceLength": 10, "stack": false, "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"ok\"}[1m])) by (type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A", + "step": 30 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Region Cache OK OPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiDB error region cache operations count", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 249, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"err\"}[1m])) by (type)", @@ -12461,6 +12548,105 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiDB loading region cache durations", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 250, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tidb_tikvclient_load_region_cache_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load Region Duration 99", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false, @@ -12473,7 +12659,7 @@ "h": 7, "w": 12, "x": 12, - "y": 21 + "y": 28 }, "id": 52, "legend": { From e4dc22a6d728de26ec1412601f650ba585bb7ab2 Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 13 Jun 2022 17:15:45 +0800 Subject: [PATCH 2/4] add avg duration Signed-off-by: you06 --- metrics/grafana/tidb.json | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index 2b73992322800..80cc6a24aa651 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -6108,7 +6108,7 @@ "x": 12, "y": 98 }, - "id": 249, + "id": 250, "legend": { "alignAsTable": true, "avg": true, @@ -12568,7 +12568,7 @@ "y": 28 }, "hiddenSeries": false, - "id": 250, + "id": 251, "legend": { "alignAsTable": true, "avg": false, @@ -12602,15 +12602,24 @@ "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "99-{{type}}", "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tidb_tikvclient_load_region_cache_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type) / sum(rate(tidb_tikvclient_load_region_cache_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "avg-{{type}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Load Region Duration 99", + "title": "Load Region Duration", "tooltip": { "shared": true, "sort": 0, From d1fb72458d0c1419e8f94a39aacfcaca65304cda Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 13 Jun 2022 17:35:07 +0800 Subject: [PATCH 3/4] change id Signed-off-by: you06 --- metrics/grafana/tidb.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index 80cc6a24aa651..a99667fbc66aa 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -6108,7 +6108,7 @@ "x": 12, "y": 98 }, - "id": 250, + "id": 249, "legend": { "alignAsTable": true, "avg": true, @@ -12475,7 +12475,7 @@ "x": 12, "y": 21 }, - "id": 249, + "id": 250, "legend": { "avg": false, "current": false, From 1d67b1a71050434ccee968b71a5c57da50478075 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 14 Jun 2022 15:29:44 +0800 Subject: [PATCH 4/4] move to KV requests row Signed-off-by: you06 --- metrics/grafana/tidb.json | 1894 ++++++++++++++++++------------------- 1 file changed, 947 insertions(+), 947 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index a99667fbc66aa..05f6b4c499721 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -8097,55 +8097,35 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "KV Request", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 147, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "pd command count by type", - "editable": true, - "error": false, + "description": "TiDB successful region cache operations count", "fill": 1, - "grid": {}, "gridPos": { "h": 7, "w": 8, - "x": 0, - "y": 10 + "x": 16, + "y": 15 }, - "id": 20, + "id": 164, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -8156,24 +8136,23 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(pd_client_cmd_handle_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"tso\"}[1m])) by (type)", + "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"ok\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", - "step": 10 + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD Client CMD OPS", + "title": "Region Cache OK OPS", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -8212,25 +8191,20 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "pd client command durations by type within 99.9 percent buckets", - "editable": true, - "error": false, + "description": "TiDB error region cache operations count", "fill": 1, - "grid": {}, "gridPos": { "h": 7, "w": 8, - "x": 8, - "y": 10 + "x": 0, + "y": 22 }, - "id": 35, + "id": 250, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, "values": false @@ -8238,7 +8212,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -8249,38 +8223,23 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.999, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", + "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"err\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "999-{{type}}", + "legendFormat": "{{type}}-err", "refId": "A", - "step": 10 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99-{{type}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90-{{type}}", - "refId": "C" + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD Client CMD Duration", + "title": "Region Cache Error OPS", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -8292,7 +8251,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8319,18 +8278,21 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "pd client command fail count by type", - "editable": true, - "error": false, + "description": "TiDB loading region cache durations", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, - "grid": {}, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, - "x": 16, - "y": 10 + "x": 8, + "y": 22 }, - "id": 43, + "hiddenSeries": false, + "id": 251, "legend": { "alignAsTable": true, "avg": false, @@ -8343,9 +8305,12 @@ "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pointradius": 5, "points": false, @@ -8356,24 +8321,33 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(pd_client_cmd_handle_failed_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tidb_tikvclient_load_region_cache_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "99-{{type}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "sum(rate(tidb_tikvclient_load_region_cache_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type) / sum(rate(tidb_tikvclient_load_region_cache_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type)", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 + "legendFormat": "avg-{{type}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD Client CMD Fail OPS", + "title": "Load Region Duration", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -8385,7 +8359,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8405,14 +8379,29 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "KV Request", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 147, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of a client calling GetTSAsync until received the TS result.", + "description": "pd command count by type", "editable": true, "error": false, "fill": 1, @@ -8421,20 +8410,22 @@ "h": 7, "w": 8, "x": 0, - "y": 17 + "y": 10 }, - "id": 79, + "id": 20, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", "percentage": false, @@ -8447,25 +8438,19 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(pd_client_cmd_handle_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "cmd", - "refId": "C" - }, - { - "expr": "sum(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m]))", + "expr": "sum(rate(pd_client_cmd_handle_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!=\"tso\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "request", - "refId": "A" + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD TSO OPS", + "title": "PD Client CMD OPS", "tooltip": { "msResolution": false, "shared": true, @@ -8482,7 +8467,7 @@ }, "yaxes": [ { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8509,7 +8494,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of a client starting to wait for the TS until received the TS result.", + "description": "pd client command durations by type within 99.9 percent buckets", "editable": true, "error": false, "fill": 1, @@ -8518,14 +8503,16 @@ "h": 7, "w": 8, "x": 8, - "y": 17 + "y": 10 }, - "id": 77, + "id": 35, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -8544,25 +8531,25 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.999, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "999", + "legendFormat": "999-{{type}}", "refId": "A", "step": 10 }, { - "expr": "histogram_quantile(0.99, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "99", + "legendFormat": "99-{{type}}", "refId": "B" }, { - "expr": "histogram_quantile(0.90, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(tidb_tikvclient_ts_future_wait_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"tso|tso_async_wait\"}[1m])) by (le, type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "90", + "legendFormat": "90-{{type}}", "refId": "C" } ], @@ -8570,7 +8557,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD TSO Wait Duration", + "title": "PD Client CMD Duration", "tooltip": { "msResolution": false, "shared": true, @@ -8614,7 +8601,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of a client sending TSO request until received the response.", + "description": "pd client command fail count by type", "editable": true, "error": false, "fill": 1, @@ -8623,20 +8610,22 @@ "h": 7, "w": 8, "x": 16, - "y": 17 + "y": 10 }, - "id": 78, + "id": 43, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", "percentage": false, @@ -8649,33 +8638,19 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", + "expr": "sum(rate(pd_client_cmd_handle_failed_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "999", + "legendFormat": "{{type}}", "refId": "A", "step": 10 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", - "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD TSO RPC Duration", + "title": "PD Client CMD Fail OPS", "tooltip": { "msResolution": false, "shared": true, @@ -8692,11 +8667,11 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -8719,7 +8694,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of the async TS until called the Wait function.", + "description": "The duration of a client calling GetTSAsync until received the TS result.", "editable": true, "error": false, "fill": 1, @@ -8728,9 +8703,9 @@ "h": 7, "w": 8, "x": 0, - "y": 24 + "y": 17 }, - "id": 159, + "id": 79, "legend": { "avg": false, "current": false, @@ -8754,33 +8729,25 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.999, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "999", - "refId": "A", - "step": 10 - }, - { - "expr": "histogram_quantile(0.99, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", + "expr": "sum(rate(pd_client_cmd_handle_cmds_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "99", - "refId": "B" + "legendFormat": "cmd", + "refId": "C" }, { - "expr": "histogram_quantile(0.90, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", + "expr": "sum(rate(pd_client_request_handle_requests_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "90", - "refId": "C" + "legendFormat": "request", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Async TSO Duration", + "title": "PD TSO OPS", "tooltip": { "msResolution": false, "shared": true, @@ -8797,7 +8764,7 @@ }, "yaxes": [ { - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -8824,25 +8791,23 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "It indicates if a request of PD client is forwarded by the PD follower", - "decimals": null, + "description": "The duration of a client starting to wait for the TS until received the TS result.", + "editable": true, + "error": false, "fill": 1, + "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 23 + "y": 17 }, - "id": 222, + "id": 77, "legend": { - "alignAsTable": false, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false @@ -8850,9 +8815,9 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -8861,22 +8826,38 @@ "steppedLine": false, "targets": [ { - "expr": "pd_client_request_forwarded_status", + "expr": "histogram_quantile(0.999, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{delegate}}-{{host}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "999", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.99, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"wait\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "90", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Request forwarded status", + "title": "PD TSO Wait Duration", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -8888,7 +8869,7 @@ }, "yaxes": [ { - "format": "none", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -8896,7 +8877,7 @@ "show": true }, { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -8915,31 +8896,23 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The duration of a client sending one HTTP request to PD util received the response.", + "description": "The duration of a client sending TSO request until received the response.", "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, - "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 23 + "y": 17 }, - "hiddenSeries": false, - "id": 246, + "id": 78, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, "values": false @@ -8948,11 +8921,7 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -8962,72 +8931,33 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", "format": "time_series", - "interval": "", "intervalFactor": 2, - "legendFormat": "999-all", + "legendFormat": "999", "refId": "A", "step": 10 }, { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "99-all", + "legendFormat": "99", "refId": "B" }, { - "exemplar": true, - "expr": "histogram_quantile(0.90, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(pd_client_request_handle_requests_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso\"}[1m])) by (le))", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "90-all", + "legendFormat": "90", "refId": "C" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.999, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "999-{{type}}", - "refId": "D", - "step": 10 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "99-{{type}}", - "refId": "E" - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.90, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "90-{{type}}", - "refId": "F" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD HTTP Request Duration", + "title": "PD TSO RPC Duration", "tooltip": { "msResolution": false, "shared": true, @@ -9071,44 +9001,32 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "PD HTTP API request count per second.", + "description": "The duration of the async TS until called the Wait function.", "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, - "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 30 + "y": 24 }, - "hiddenSeries": false, - "id": 247, + "id": 159, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, "percentage": false, - "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -9118,32 +9036,33 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "histogram_quantile(0.999, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", "format": "time_series", - "interval": "", "intervalFactor": 2, - "legendFormat": "all", + "legendFormat": "999", "refId": "A", "step": 10 }, { - "exemplar": true, - "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "histogram_quantile(0.99, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", "format": "time_series", - "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "B", - "step": 10 + "legendFormat": "99", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.90, sum(rate(pd_client_cmd_handle_cmds_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"tso_async_wait\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "90", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD HTTP Request OPS", + "title": "Async TSO Duration", "tooltip": { "msResolution": false, "shared": true, @@ -9160,7 +9079,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -9187,45 +9106,35 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "PD failed HTTP request count per second.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "It indicates if a request of PD client is forwarded by the PD follower", + "decimals": null, "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 30 + "y": 23 }, - "hiddenSeries": false, - "id": 248, + "id": 222, "legend": { - "alignAsTable": true, + "alignAsTable": false, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, + "nullPointMode": "null", "percentage": false, - "pluginVersion": "7.5.10", - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -9234,37 +9143,22 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result!~\"200.*\"}[1m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "all", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result!~\"200.*\"}[1m])) by (type, result)", + "expr": "pd_client_request_forwarded_status", "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}} - {{result}}", - "refId": "B", - "step": 10 + "intervalFactor": 1, + "legendFormat": "{{delegate}}-{{host}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "PD HTTP Request Fail OPS", + "title": "Request forwarded status", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -9276,15 +9170,15 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -9296,40 +9190,31 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "PD Client", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 148, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB loading schema time durations by instance", + "description": "The duration of a client sending one HTTP request to PD util received the response.", "editable": true, "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 47 + "w": 8, + "x": 16, + "y": 23 }, - "id": 27, + "hiddenSeries": false, + "id": 246, "legend": { "alignAsTable": true, "avg": false, @@ -9345,7 +9230,11 @@ "linewidth": 1, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -9355,25 +9244,77 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tidb_domain_load_schema_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "", + "legendFormat": "999-all", "refId": "A", "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99-all", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90-all", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "999-{{type}}", + "refId": "D", + "step": 10 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "99-{{type}}", + "refId": "E" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.90, sum(rate(tidb_server_pd_api_execution_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "90-{{type}}", + "refId": "F" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Load Schema Duration", + "title": "PD HTTP Request Duration", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -9389,7 +9330,7 @@ "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -9412,18 +9353,24 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB loading schema times including both failed and successful ones", + "description": "PD HTTP API request count per second.", "editable": true, "error": false, - "fill": 0, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 47 + "w": 8, + "x": 0, + "y": 30 }, - "id": 28, + "hiddenSeries": false, + "id": 247, "legend": { "alignAsTable": true, "avg": false, @@ -9436,43 +9383,54 @@ "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*failed/", - "bars": true - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_domain_load_schema_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance,type)", + "exemplar": true, + "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}-{{type}}", - "metric": "tidb_domain_load_schema_duration_count", + "legendFormat": "all", "refId": "A", "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "B", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Load Schema OPS", + "title": "PD HTTP Request OPS", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -9486,7 +9444,7 @@ { "format": "short", "label": null, - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true @@ -9511,25 +9469,28 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "decimals": 2, - "description": "TiDB schema lease error counts", + "description": "PD failed HTTP request count per second.", "editable": true, "error": false, - "fill": 0, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 54 + "w": 8, + "x": 8, + "y": 30 }, - "id": 29, + "hiddenSeries": false, + "id": 248, "legend": { "alignAsTable": true, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -9538,10 +9499,14 @@ "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.10", "pointradius": 5, "points": false, "renderer": "flot", @@ -9551,25 +9516,37 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(tidb_session_schema_lease_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", + "exemplar": true, + "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result!~\"200.*\"}[1m]))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "tidb_server_", + "legendFormat": "all", "refId": "A", "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tidb_server_pd_api_request_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result!~\"200.*\"}[1m])) by (type, result)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{type}} - {{result}}", + "refId": "B", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Schema Lease Error OPM", + "title": "PD HTTP Request Fail OPS", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -9601,25 +9578,40 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "PD Client", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 148, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB load privilege counts", + "description": "TiDB loading schema time durations by instance", "editable": true, "error": false, - "fill": 0, + "fill": 1, "grid": {}, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 54 + "x": 0, + "y": 47 }, - "id": 157, + "id": 27, "legend": { "alignAsTable": true, "avg": false, @@ -9639,22 +9631,17 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*failed/", - "bars": true - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_domain_load_privilege_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance,type)", + "expr": "histogram_quantile(0.99, sum(rate(tidb_domain_load_schema_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}-{{type}}", - "metric": "tidb_domain_load_schema_duration_count", + "legendFormat": "{{instance}}", + "metric": "", "refId": "A", "step": 10 } @@ -9663,7 +9650,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Load Privilege OPS", + "title": "Load Schema Duration", "tooltip": { "msResolution": false, "shared": true, @@ -9680,11 +9667,11 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, - "logBase": 10, + "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -9700,40 +9687,25 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Schema Load", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 149, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB DDL duration statistics", + "description": "TiDB loading schema times including both failed and successful ones", "editable": true, "error": false, - "fill": 1, + "fill": 0, "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 0, - "y": 12 + "w": 12, + "x": 12, + "y": 47 }, - "id": 9, + "id": 28, "legend": { "alignAsTable": true, "avg": false, @@ -9746,45 +9718,43 @@ "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*failed/", + "bars": true + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tidb_ddl_handle_job_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "sum(rate(tidb_domain_load_schema_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance,type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}-{{type}}", + "metric": "tidb_domain_load_schema_duration_count", "refId": "A", "step": 10 - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tidb_ddl_batch_add_idx_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "add index worker", - "refId": "B", - "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL Duration 95", + "title": "Load Schema OPS", "tooltip": { "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -9796,11 +9766,11 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, - "logBase": 1, + "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -9823,32 +9793,36 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Some DDLs need to backfill data, for example, adding indexes, column type changes, etc. This metrics shows the number of rows backfilled per second.", - "fill": 1, + "decimals": 2, + "description": "TiDB schema lease error counts", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 12 + "w": 12, + "x": 0, + "y": 54 }, - "id": 63, + "id": 29, "legend": { "alignAsTable": true, "avg": false, - "current": true, + "current": false, "hideEmpty": true, "hideZero": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, @@ -9859,19 +9833,22 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_ddl_add_index_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(increase(tidb_session_schema_lease_error_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A" + "legendFormat": "{{instance}}", + "metric": "tidb_server_", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL Backfill Data Rate", + "title": "Schema Lease Error OPM", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -9886,7 +9863,7 @@ }, "yaxes": [ { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -9913,21 +9890,22 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB ddl request in queue", - "fill": 1, + "description": "TiDB load privilege counts", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 12 + "w": 12, + "x": 12, + "y": 54 }, - "id": 62, + "id": 157, "legend": { "alignAsTable": true, "avg": false, "current": false, - "hideEmpty": true, - "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -9938,30 +9916,38 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*failed/", + "bars": true + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "tidb_ddl_waiting_jobs{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", + "expr": "sum(rate(tidb_domain_load_privilege_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance,type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}-{{type}}", - "refId": "A" + "metric": "tidb_domain_load_schema_duration_count", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL Waiting Jobs Count", + "title": "Load Privilege OPS", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -9976,11 +9962,11 @@ }, "yaxes": [ { - "format": "none", + "format": "short", "label": null, - "logBase": 1, + "logBase": 10, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -9996,22 +9982,40 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "Schema Load", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 149, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB different ddl worker numbers", + "description": "TiDB DDL duration statistics", + "editable": true, + "error": false, "fill": 1, + "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 19 + "y": 12 }, - "id": 55, + "id": 9, "legend": { "alignAsTable": true, "avg": false, @@ -10024,9 +10028,9 @@ "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, @@ -10037,22 +10041,32 @@ "steppedLine": false, "targets": [ { - "expr": "increase(tidb_ddl_worker_operation_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", + "expr": "histogram_quantile(0.95, sum(rate(tidb_ddl_handle_job_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}-{{type}}", - "refId": "A" + "legendFormat": "{{type}}", + "refId": "A", + "step": 10 + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tidb_ddl_batch_add_idx_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "add index worker", + "refId": "B", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL META OPM", + "title": "DDL Duration 95", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -10064,11 +10078,11 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -10091,27 +10105,27 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB worker duration by type, action, results", + "description": "Some DDLs need to backfill data, for example, adding indexes, column type changes, etc. This metrics shows the number of rows backfilled per second.", "fill": 1, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 19 + "w": 8, + "x": 8, + "y": 12 }, - "id": 56, + "id": 63, "legend": { "alignAsTable": true, "avg": false, - "current": false, + "current": true, "hideEmpty": true, "hideZero": true, - "max": false, + "max": true, "min": false, "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -10127,10 +10141,10 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(increase(tidb_ddl_worker_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, action, result))", + "expr": "sum(rate(tidb_ddl_add_index_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}-{{action}}-{{result}}", + "legendFormat": "{{type}}", "refId": "A" } ], @@ -10138,7 +10152,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL Worker Duration 99", + "title": "DDL Backfill Data Rate", "tooltip": { "shared": true, "sort": 0, @@ -10154,7 +10168,7 @@ }, "yaxes": [ { - "format": "s", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -10181,19 +10195,21 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB ddl schema syncer statistics, including init, start, watch, clear function call time cost", + "description": "TiDB ddl request in queue", "fill": 1, "gridPos": { "h": 7, "w": 8, - "x": 0, - "y": 26 + "x": 16, + "y": 12 }, - "id": 64, + "id": 62, "legend": { "alignAsTable": true, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -10215,10 +10231,10 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tidb_ddl_deploy_syncer_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, type, result))", + "expr": "tidb_ddl_waiting_jobs{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}-{{result}}", + "legendFormat": "{{instance}}-{{type}}", "refId": "A" } ], @@ -10226,7 +10242,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Deploy Syncer Duration", + "title": "DDL Waiting Jobs Count", "tooltip": { "shared": true, "sort": 0, @@ -10242,11 +10258,11 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -10269,15 +10285,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB ddl owner time operations on etcd duration statistics ", + "description": "TiDB different ddl worker numbers", "fill": 1, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 26 + "w": 12, + "x": 0, + "y": 19 }, - "id": 65, + "id": 55, "legend": { "alignAsTable": true, "avg": false, @@ -10303,10 +10319,10 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tidb_ddl_owner_handle_syncer_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, type, result))", + "expr": "increase(tidb_ddl_worker_operation_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}-{{result}}", + "legendFormat": "{{instance}}-{{type}}", "refId": "A" } ], @@ -10314,7 +10330,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Owner Handle Syncer Duration", + "title": "DDL META OPM", "tooltip": { "shared": true, "sort": 0, @@ -10330,7 +10346,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -10357,19 +10373,21 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB schema syncer version update time duration", + "description": "TiDB worker duration by type, action, results", "fill": 1, "gridPos": { "h": 7, - "w": 8, - "x": 16, - "y": 26 + "w": 12, + "x": 12, + "y": 19 }, - "id": 66, + "id": 56, "legend": { "alignAsTable": true, "avg": false, "current": false, + "hideEmpty": true, + "hideZero": true, "max": false, "min": false, "rightSide": true, @@ -10391,10 +10409,10 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tidb_ddl_update_self_ver_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, result))", + "expr": "histogram_quantile(0.99, sum(increase(tidb_ddl_worker_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, action, result))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{result}}", + "legendFormat": "{{type}}-{{action}}-{{result}}", "refId": "A" } ], @@ -10402,7 +10420,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Update Self Version Duration", + "title": "DDL Worker Duration 99", "tooltip": { "shared": true, "sort": 0, @@ -10422,7 +10440,7 @@ "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -10445,20 +10463,22 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "executed DDL jobs per minute", + "description": "TiDB ddl schema syncer statistics, including init, start, watch, clear function call time cost", "fill": 1, "gridPos": { - "h": 8, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 33 + "y": 26 }, - "id": 190, + "id": 64, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10468,7 +10488,7 @@ "links": [], "nullPointMode": "null", "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -10477,25 +10497,18 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_ddl_handle_job_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "histogram_quantile(1, sum(rate(tidb_ddl_deploy_syncer_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, type, result))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ type }}", + "intervalFactor": 2, + "legendFormat": "{{type}}-{{result}}", "refId": "A" - }, - { - "expr": "sum(rate(tidb_ddl_handle_job_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "total", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL OPM", + "title": "Deploy Syncer Duration", "tooltip": { "shared": true, "sort": 0, @@ -10538,15 +10551,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB DDL backfill progress in percentage. The value is [0,100]", + "description": "TiDB ddl owner time operations on etcd duration statistics ", "fill": 1, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 33 + "h": 7, + "w": 8, + "x": 8, + "y": 26 }, - "id": 192, + "id": 65, "legend": { "alignAsTable": true, "avg": false, @@ -10562,9 +10575,8 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": {}, "percentage": false, - "pointradius": 2, + "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -10573,25 +10585,18 @@ "steppedLine": false, "targets": [ { - "expr": "tidb_ddl_backfill_percentage_progress{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"add_index\"}", + "expr": "histogram_quantile(1, sum(rate(tidb_ddl_owner_handle_syncer_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, type, result))", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", + "intervalFactor": 2, + "legendFormat": "{{type}}-{{result}}", "refId": "A" - }, - { - "expr": "tidb_ddl_backfill_percentage_progress{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"modify_column\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}", - "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "DDL backfill progress in percentage", + "title": "Owner Handle Syncer Duration", "tooltip": { "shared": true, "sort": 0, @@ -10607,11 +10612,11 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -10627,42 +10632,29 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "DDL", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 - }, - "id": 150, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB auto analyze time durations within 95 percent histogram buckets", + "description": "TiDB schema syncer version update time duration", "fill": 1, "gridPos": { "h": 7, "w": 8, - "x": 0, - "y": 149 + "x": 16, + "y": 26 }, - "id": 46, + "id": 66, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10681,19 +10673,18 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_auto_analyze_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "histogram_quantile(1, sum(rate(tidb_ddl_update_self_ver_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[2m])) by (le, result))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "auto analyze duration", - "refId": "A", - "step": 30 + "legendFormat": "{{result}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Auto Analyze Duration 95", + "title": "Update Self Version Duration", "tooltip": { "shared": true, "sort": 0, @@ -10709,7 +10700,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -10736,15 +10727,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB auto analyze query per second", + "description": "executed DDL jobs per minute", "fill": 1, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 149 + "h": 8, + "w": 12, + "x": 0, + "y": 33 }, - "id": 47, + "id": 190, "legend": { "avg": false, "current": false, @@ -10759,7 +10750,7 @@ "links": [], "nullPointMode": "null", "percentage": false, - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -10768,19 +10759,25 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_auto_analyze_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tidb_ddl_handle_job_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 30 + "intervalFactor": 1, + "legendFormat": "{{ type }}", + "refId": "A" + }, + { + "expr": "sum(rate(tidb_ddl_handle_job_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Auto Analyze QPS", + "title": "DDL OPM", "tooltip": { "shared": true, "sort": 0, @@ -10823,20 +10820,22 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB statistics inaccurate rate", + "description": "TiDB DDL backfill progress in percentage. The value is [0,100]", "fill": 1, "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 149 + "h": 8, + "w": 12, + "x": 12, + "y": 33 }, - "id": 70, + "id": 192, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -10845,8 +10844,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": {}, "percentage": false, - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -10855,33 +10855,25 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "tidb_ddl_backfill_percentage_progress{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"add_index\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99", - "refId": "A", - "step": 30 + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", + "refId": "A" }, { - "expr": "histogram_quantile(0.90, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "tidb_ddl_backfill_percentage_progress{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=\"modify_column\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "90", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{type}}", "refId": "B" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "50", - "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Stats Inaccuracy Rate", + "title": "DDL backfill progress in percentage", "tooltip": { "shared": true, "sort": 0, @@ -10901,7 +10893,7 @@ "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -10917,22 +10909,37 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "DDL", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 150, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB optimizer using pseudo estimation counts", + "description": "TiDB auto analyze time durations within 95 percent histogram buckets", "fill": 1, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 156 + "y": 149 }, - "id": 71, + "id": 46, "legend": { "avg": false, "current": false, @@ -10956,10 +10963,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_pseudo_estimation_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (type)", + "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_auto_analyze_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "auto analyze duration", "refId": "A", "step": 30 } @@ -10968,7 +10975,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Pseudo Estimation OPS", + "title": "Auto Analyze Duration 95", "tooltip": { "shared": true, "sort": 0, @@ -11011,15 +11018,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB dumping statistics back to kv storage times", + "description": "TiDB auto analyze query per second", "fill": 1, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 156 + "y": 149 }, - "id": 92, + "id": 47, "legend": { "avg": false, "current": false, @@ -11043,7 +11050,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_dump_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tidb_statistics_auto_analyze_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -11055,7 +11062,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Dump Feedback OPS", + "title": "Auto Analyze QPS", "tooltip": { "shared": true, "sort": 0, @@ -11098,15 +11105,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB store quering feedback counts", + "description": "TiDB statistics inaccurate rate", "fill": 1, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 156 + "y": 149 }, - "id": 170, + "id": 70, "legend": { "avg": false, "current": false, @@ -11130,19 +11137,33 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_store_query_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) ", + "expr": "histogram_quantile(0.99, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "99", "refId": "A", "step": 30 + }, + { + "expr": "histogram_quantile(0.90, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "90", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(tidb_statistics_stats_inaccuracy_rate_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Store Query Feedback QPS", + "title": "Stats Inaccuracy Rate", "tooltip": { "shared": true, "sort": 0, @@ -11185,15 +11206,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Counter of query feedback whose actual count is much different than calculated by current statistics", + "description": "TiDB optimizer using pseudo estimation counts", "fill": 1, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 163 + "y": 156 }, - "id": 113, + "id": 71, "legend": { "avg": false, "current": false, @@ -11217,19 +11238,19 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_high_error_rate_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(rate(tidb_statistics_pseudo_estimation_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) by (type)", "format": "time_series", - "hide": false, "intervalFactor": 2, - "legendFormat": "Significant Feedback", - "refId": "A" + "legendFormat": "{{type}}", + "refId": "A", + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Significant Feedback", + "title": "Pseudo Estimation OPS", "tooltip": { "shared": true, "sort": 0, @@ -11272,15 +11293,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB updating statistics using feed back counts", + "description": "TiDB dumping statistics back to kv storage times", "fill": 1, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 163 + "y": 156 }, - "id": 93, + "id": 92, "legend": { "avg": false, "current": false, @@ -11304,7 +11325,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_statistics_update_stats_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tidb_statistics_dump_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", @@ -11316,7 +11337,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Update Stats OPS", + "title": "Dump Feedback OPS", "tooltip": { "shared": true, "sort": 0, @@ -11359,35 +11380,28 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB fast analyze statistics ", - "editable": true, - "error": false, + "description": "TiDB store quering feedback counts", "fill": 1, - "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 163 + "y": 156 }, - "id": 173, + "id": 170, "legend": { - "alignAsTable": true, "avg": false, "current": false, - "max": true, + "max": false, "min": false, - "rightSide": true, "show": true, - "sort": "avg", - "sortDesc": true, "total": false, - "values": true + "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -11398,24 +11412,23 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(1, sum(rate(tidb_statistics_fast_analyze_status_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "sum(rate(tidb_statistics_store_query_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type) ", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", - "step": 40 + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Fast Analyze Status 100", + "title": "Store Query Feedback QPS", "tooltip": { - "msResolution": false, "shared": true, - "sort": 2, - "value_type": "cumulative" + "sort": 0, + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -11427,7 +11440,7 @@ }, "yaxes": [ { - "format": "none", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -11440,7 +11453,7 @@ "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -11454,21 +11467,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "Counter of query feedback whose actual count is much different than calculated by current statistics", "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 170 + "y": 163 }, - "hiddenSeries": false, - "id": 229, + "id": 113, "legend": { "avg": false, "current": false, @@ -11482,9 +11489,6 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, "pointradius": 5, "points": false, @@ -11495,32 +11499,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tidb_statistics_sync_load_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "sync-load", - "refId": "A", - "step": 30 - }, - { - "exemplar": true, - "expr": "sum(rate(tidb_statistics_sync_load_timeout_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(rate(tidb_statistics_high_error_rate_feedback_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "hide": false, - "interval": "", "intervalFactor": 2, - "legendFormat": "timeout", - "refId": "B", - "step": 30 + "legendFormat": "Significant Feedback", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Sync Load QPS", + "title": "Significant Feedback", "tooltip": { "shared": true, "sort": 0, @@ -11563,21 +11554,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "TiDB updating statistics using feed back counts", "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 170 - }, - "hiddenSeries": false, - "id": 230, + "y": 163 + }, + "id": 93, "legend": { "avg": false, "current": false, @@ -11591,9 +11576,6 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, "pointradius": 5, "points": false, @@ -11604,32 +11586,19 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_sync_load_latency_millis_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "sum(rate(tidb_statistics_update_stats_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", - "interval": "", "intervalFactor": 2, - "legendFormat": "sync-load", + "legendFormat": "{{type}}", "refId": "A", "step": 30 - }, - { - "exemplar": true, - "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_read_stats_latency_millis_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "read-stats", - "refId": "B", - "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Sync Load Latency 95 (ms)", + "title": "Update Stats OPS", "tooltip": { "shared": true, "sort": 0, @@ -11672,37 +11641,35 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB table stats healthy distribution", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, + "description": "TiDB fast analyze statistics ", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 170 + "y": 163 }, - "hiddenSeries": false, - "id": 233, + "id": 173, "legend": { + "alignAsTable": true, "avg": false, "current": false, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "avg", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, @@ -11713,25 +11680,24 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "avg(tidb_statistics_stats_healthy{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", + "expr": "histogram_quantile(1, sum(rate(tidb_statistics_fast_analyze_status_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", - "interval": "", "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "A", - "step": 30 + "step": 40 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Stats Healthy Distribution", + "title": "Fast Analyze Status 100", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, - "value_type": "individual" + "sort": 2, + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -11743,7 +11709,7 @@ }, "yaxes": [ { - "format": "short", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -11756,7 +11722,7 @@ "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { @@ -11770,47 +11736,38 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB managing stats cache by lru", - "editable": true, - "error": false, + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 12, + "w": 8, "x": 0, - "y": 177 + "y": 170 }, "hiddenSeries": false, - "id": 234, + "id": 229, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": false, - "max": true, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, - "sideWidth": null, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -11821,32 +11778,35 @@ "targets": [ { "exemplar": true, - "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"track\"}", + "expr": "sum(rate(tidb_statistics_sync_load_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "interval": "", - "intervalFactor": 1, - "legendFormat": "track-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "sync-load", + "refId": "A", + "step": 30 }, { "exemplar": true, - "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"capacity\"}", - "hide": true, + "expr": "sum(rate(tidb_statistics_sync_load_timeout_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "hide": false, "interval": "", - "legendFormat": "capacity--{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "timeout", + "refId": "B", + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Stats Cache LRU Cost", + "title": "Sync Load QPS", "tooltip": { - "msResolution": true, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -11858,20 +11818,20 @@ }, "yaxes": [ { - "format": "bytes", - "label": "", + "format": "short", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { "format": "short", - "label": "", + "label": null, "logBase": 1, "max": null, "min": null, - "show": false + "show": true } ], "yaxis": { @@ -11885,7 +11845,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB managing stats cache by lru", + "description": "", "fieldConfig": { "defaults": {}, "overrides": [] @@ -11894,22 +11854,20 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 177 + "w": 8, + "x": 8, + "y": 170 }, "hiddenSeries": false, - "id": 235, + "id": 230, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, + "avg": false, + "current": false, + "max": false, "min": false, - "rightSide": true, "show": true, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -11919,7 +11877,6 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -11930,20 +11887,31 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tidb_statistics_stats_cache_lru_op{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", + "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_sync_load_latency_millis_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "sync-load", "refId": "A", "step": 30 + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tidb_statistics_read_stats_latency_millis_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "read-stats", + "refId": "B", + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Stats Cache LRU OPS", + "title": "Sync Load Latency 95 (ms)", "tooltip": { "shared": true, "sort": 0, @@ -11959,7 +11927,6 @@ }, "yaxes": [ { - "$$hashKey": "object:90", "format": "short", "label": null, "logBase": 1, @@ -11968,7 +11935,6 @@ "show": true }, { - "$$hashKey": "object:91", "format": "short", "label": null, "logBase": 1, @@ -11981,55 +11947,44 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Statistics", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 13 - }, - "id": 161, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB new session durations for new etcd sessions", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, + "description": "TiDB table stats healthy distribution", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, - "x": 0, - "y": 150 + "x": 16, + "y": 170 }, - "id": 162, + "hiddenSeries": false, + "id": 233, "legend": { - "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 2, + "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, "pointradius": 5, "points": false, @@ -12040,24 +11995,25 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(tidb_owner_new_session_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance, result))", + "exemplar": true, + "expr": "avg(tidb_statistics_stats_healthy{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (type)", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "{{instance}}-{{result}}", + "legendFormat": "{{type}}", "refId": "A", - "step": 10 + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "New ETCD Session Duration 95", + "title": "Stats Healthy Distribution", "tooltip": { - "msResolution": false, "shared": true, "sort": 0, - "value_type": "cumulative" + "value_type": "individual" }, "type": "graph", "xaxis": { @@ -12069,11 +12025,11 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -12096,29 +12052,47 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB owner watcher counts", - "fill": 1, + "description": "TiDB managing stats cache by lru", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, "gridPos": { "h": 7, - "w": 8, - "x": 8, - "y": 150 + "w": 12, + "x": 0, + "y": 177 }, - "id": 163, + "hiddenSeries": false, + "id": 234, "legend": { + "alignAsTable": true, "avg": false, - "current": false, - "max": false, + "current": true, + "hideEmpty": true, + "hideZero": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sideWidth": null, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -12128,23 +12102,33 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_owner_watch_owner_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type, result, instance)", + "exemplar": true, + "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"track\"}", "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{type}}-{{result}}-{{instance}}", - "refId": "A", - "step": 30 + "interval": "", + "intervalFactor": 1, + "legendFormat": "track-{{instance}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "tidb_statistics_stats_cache_lru_val{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", type=\"capacity\"}", + "hide": true, + "interval": "", + "legendFormat": "capacity--{{instance}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Owner Watcher OPS", + "title": "Stats Cache LRU Cost", "tooltip": { + "msResolution": true, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -12156,72 +12140,68 @@ }, "yaxes": [ { - "format": "short", - "label": null, + "format": "bytes", + "label": "", "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { "format": "short", - "label": null, + "label": "", "logBase": 1, "max": null, "min": null, - "show": true + "show": false } ], "yaxis": { "align": false, "alignLevel": null } - } - ], - "title": "Owner", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 151, - "panels": [ + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB auto id requests per second including single table/global auto id processing and single table auto id rebase processing", + "description": "TiDB managing stats cache by lru", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 14 + "x": 12, + "y": 177 }, - "id": 50, + "hiddenSeries": false, + "id": 235, "legend": { "alignAsTable": true, - "avg": false, - "current": false, - "max": false, + "avg": true, + "current": true, + "max": true, "min": false, "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, + "pluginVersion": "7.5.11", "pointradius": 5, "points": false, "renderer": "flot", @@ -12231,18 +12211,21 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_autoid_operation_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "exemplar": true, + "expr": "sum(rate(tidb_statistics_stats_cache_lru_op{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[1m])) by (type)", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "AutoID QPS", - "refId": "A" + "legendFormat": "{{type}}", + "refId": "A", + "step": 30 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "AutoID QPS", + "title": "Stats Cache LRU OPS", "tooltip": { "shared": true, "sort": 0, @@ -12258,6 +12241,7 @@ }, "yaxes": [ { + "$$hashKey": "object:90", "format": "short", "label": null, "logBase": 1, @@ -12266,6 +12250,7 @@ "show": true }, { + "$$hashKey": "object:91", "format": "short", "label": null, "logBase": 1, @@ -12278,37 +12263,55 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "Statistics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 161, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB auto id requests durations", + "description": "TiDB new session durations for new etcd sessions", + "editable": true, + "error": false, "fill": 1, + "grid": {}, "gridPos": { "h": 7, - "w": 12, - "x": 12, - "y": 14 + "w": 8, + "x": 0, + "y": 150 }, - "id": 51, + "id": 162, "legend": { "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, - "rightSide": true, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 2, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "percentage": false, "pointradius": 5, "points": false, @@ -12319,29 +12322,24 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tidb_autoid_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "99-{{type}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.80, sum(rate(tidb_autoid_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.95, sum(rate(tidb_owner_new_session_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance, result))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "80-{{type}}", - "refId": "C" + "legendFormat": "{{instance}}-{{result}}", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "AutoID Duration", + "title": "New ETCD Session Duration 95", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, - "value_type": "individual" + "value_type": "cumulative" }, "type": "graph", "xaxis": { @@ -12355,9 +12353,9 @@ { "format": "s", "label": null, - "logBase": 2, + "logBase": 1, "max": null, - "min": "0.001", + "min": "0", "show": true }, { @@ -12380,15 +12378,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB successful region cache operations count", + "description": "TiDB owner watcher counts", "fill": 1, "gridPos": { "h": 7, - "w": 12, - "x": 0, - "y": 21 + "w": 8, + "x": 8, + "y": 150 }, - "id": 164, + "id": 163, "legend": { "avg": false, "current": false, @@ -12412,10 +12410,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"ok\"}[1m])) by (type)", + "expr": "sum(rate(tidb_owner_watch_owner_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type, result, instance)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "{{type}}-{{result}}-{{instance}}", "refId": "A", "step": 30 } @@ -12424,7 +12422,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Region Cache OK OPS", + "title": "Owner Watcher OPS", "tooltip": { "shared": true, "sort": 0, @@ -12460,27 +12458,43 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "Owner", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 151, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB error region cache operations count", + "description": "TiDB auto id requests per second including single table/global auto id processing and single table auto id rebase processing", "fill": 1, "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 21 + "x": 0, + "y": 14 }, - "id": 250, + "id": 50, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": true, "show": true, "total": false, "values": false @@ -12499,19 +12513,18 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tidb_tikvclient_region_cache_operations_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", result=\"err\"}[1m])) by (type)", + "expr": "sum(rate(tidb_autoid_operation_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 30 + "legendFormat": "AutoID QPS", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Region Cache Error OPS", + "title": "AutoID QPS", "tooltip": { "shared": true, "sort": 0, @@ -12554,21 +12567,15 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiDB loading region cache durations", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "description": "TiDB auto id requests durations", "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, - "x": 0, - "y": 28 + "x": 12, + "y": 14 }, - "hiddenSeries": false, - "id": 251, + "id": 51, "legend": { "alignAsTable": true, "avg": false, @@ -12584,9 +12591,6 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "options": { - "alertThreshold": true - }, "percentage": false, "pointradius": 5, "points": false, @@ -12597,29 +12601,25 @@ "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(tidb_tikvclient_load_region_cache_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(tidb_autoid_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", - "interval": "", "intervalFactor": 2, "legendFormat": "99-{{type}}", - "refId": "A" + "refId": "B" }, { - "exemplar": true, - "expr": "sum(rate(tidb_tikvclient_load_region_cache_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type) / sum(rate(tidb_tikvclient_load_region_cache_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type)", + "expr": "histogram_quantile(0.80, sum(rate(tidb_autoid_operation_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", "format": "time_series", - "interval": "", "intervalFactor": 2, - "legendFormat": "avg-{{type}}", - "refId": "B" + "legendFormat": "80-{{type}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Load Region Duration", + "title": "AutoID Duration", "tooltip": { "shared": true, "sort": 0, @@ -12637,9 +12637,9 @@ { "format": "s", "label": null, - "logBase": 1, + "logBase": 2, "max": null, - "min": "0", + "min": "0.001", "show": true }, { @@ -12667,8 +12667,8 @@ "gridPos": { "h": 7, "w": 12, - "x": 12, - "y": 28 + "x": 0, + "y": 42 }, "id": 52, "legend": {