From a64d76bbc3b829653f610d6d9e4a2feeeba64fa4 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Sat, 21 Sep 2024 11:47:30 +0800 Subject: [PATCH] Storages: Add Block pop-up latency metrics (#9260) (#9418) ref pingcap/tiflash#9417 Co-authored-by: jinhelin Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> Co-authored-by: JaySon --- dbms/src/Common/TiFlashMetrics.h | 6 +- .../DeltaMerge/ReadThread/WorkQueue.h | 21 ++- metrics/grafana/tiflash_summary.json | 128 ++++++++++++++++++ 3 files changed, 150 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/TiFlashMetrics.h b/dbms/src/Common/TiFlashMetrics.h index 7496107d889..b7e93bea214 100644 --- a/dbms/src/Common/TiFlashMetrics.h +++ b/dbms/src/Common/TiFlashMetrics.h @@ -848,7 +848,11 @@ static_assert(RAFT_REGION_BIG_WRITE_THRES * 4 < RAFT_REGION_BIG_WRITE_MAX, "Inva F(type_fg_read, {"type", "fg_read"}), \ F(type_bg_read, {"type", "bg_read"}), \ F(type_fg_write, {"type", "fg_write"}), \ - F(type_bg_write, {"type", "bg_write"})) + F(type_bg_write, {"type", "bg_write"})) \ + M(tiflash_read_thread_internal_us, \ + "Durations of read thread internal components", \ + Histogram, \ + F(type_block_queue_pop_latency, {{"type", "block_queue_pop_latency"}}, ExpBuckets{1, 2, 20})) /// Buckets with boundaries [start * base^0, start * base^1, ..., start * base^(size-1)] diff --git a/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h b/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h index 039bb9f4ffb..048bbd38a6a 100644 --- a/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h +++ b/dbms/src/Storages/DeltaMerge/ReadThread/WorkQueue.h @@ -32,7 +32,8 @@ class WorkQueue std::condition_variable writer_cv; std::condition_variable finish_cv; PipeConditionVariable pipe_cv; - std::queue queue; + // + std::queue> queue; bool done; std::size_t max_size; @@ -49,6 +50,12 @@ class WorkQueue return queue.size() >= max_size; } + static void reportPopLatency(UInt64 push_timestamp_ns) + { + auto latency_ns = clock_gettime_ns_adjusted(push_timestamp_ns) - push_timestamp_ns; + GET_METRIC(tiflash_read_thread_internal_us, type_block_queue_pop_latency).Observe(latency_ns / 1000.0); + } + public: /** * Constructs an empty work queue with an optional max size. @@ -99,7 +106,7 @@ class WorkQueue { return false; } - queue.push(std::forward(item)); + queue.push(std::make_pair(std::forward(item), clock_gettime_ns())); peak_queue_size = std::max(queue.size(), peak_queue_size); if (size != nullptr) { @@ -121,6 +128,7 @@ class WorkQueue */ bool pop(T & item) { + UInt64 push_timestamp_ns = 0; { std::unique_lock lock(mu); ++pop_times; @@ -134,10 +142,12 @@ class WorkQueue assert(done); return false; } - item = std::move(queue.front()); + item = std::move(queue.front().first); + push_timestamp_ns = queue.front().second; queue.pop(); } writer_cv.notify_one(); + reportPopLatency(push_timestamp_ns); return true; } @@ -152,6 +162,7 @@ class WorkQueue */ bool tryPop(T & item) { + UInt64 push_timestamp_ns = 0; { std::lock_guard lock(mu); ++pop_times; @@ -167,10 +178,12 @@ class WorkQueue return false; } } - item = std::move(queue.front()); + item = std::move(queue.front().first); + push_timestamp_ns = queue.front().second; queue.pop(); } writer_cv.notify_one(); + reportPopLatency(push_timestamp_ns); return true; } diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index 2e163e02967..99f12b7a7aa 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -8609,6 +8609,134 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 165 + }, + "hiddenSeries": false, + "id": 292, + "legend": { + "alignAsTable": false, + "avg": true, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.17", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.999, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "999-{{type}}", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "99-{{type}}", + "refId": "B" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.95, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "95-{{type}}", + "refId": "C" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.80, sum(rate(tiflash_read_thread_internal_us_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "80-{{type}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Thread Internal Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:222", + "format": "µs", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:223", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": {}, "bars": false,